VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 105290

Last change on this file since 105290 was 105271, checked in by vboxsync, 7 months ago

VMM/IEM: Replaced IEMNATIVEEXITREASON with IEMNATIVELABELTYPE, since it's always been a super set of it. Some source code width adjustments. bugref:10677

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 491.4 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 105271 2024-07-11 10:30:56Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
117#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
118 if ( enmClass == kIemNativeGstRegRef_EFlags
119 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
120 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
121#else
122 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
123#endif
124
125 if ( enmClass == kIemNativeGstRegRef_Gpr
126 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
127 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
128#endif
129
130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
131 if ( enmClass == kIemNativeGstRegRef_XReg
132 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
133 {
134 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
135 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
136 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
137
138 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
139 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
140 }
141#endif
142 RT_NOREF(pReNative, enmClass, idxReg);
143 return off;
144}
145
146
147
148/*********************************************************************************************************************************
149* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
150*********************************************************************************************************************************/
151
152#undef IEM_MC_BEGIN /* unused */
153#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
154 { \
155 Assert(pReNative->Core.bmVars == 0); \
156 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
157 Assert(pReNative->Core.bmStack == 0); \
158 pReNative->fMc = (a_fMcFlags); \
159 pReNative->fCImpl = (a_fCImplFlags); \
160 pReNative->cArgsX = (a_cArgsIncludingHidden)
161
162/** We have to get to the end in recompilation mode, as otherwise we won't
163 * generate code for all the IEM_MC_IF_XXX branches. */
164#define IEM_MC_END() \
165 iemNativeVarFreeAll(pReNative); \
166 } return off
167
168
169
170/*********************************************************************************************************************************
171* Native Emitter Support. *
172*********************************************************************************************************************************/
173
174#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
175
176#define IEM_MC_NATIVE_ELSE() } else {
177
178#define IEM_MC_NATIVE_ENDIF() } ((void)0)
179
180
181#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
182 off = a_fnEmitter(pReNative, off)
183
184#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
185 off = a_fnEmitter(pReNative, off, (a0))
186
187#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1))
189
190#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
192
193#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
194 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
195
196#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
198
199#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
201
202#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
204
205#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
207
208
209#ifndef RT_ARCH_AMD64
210# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
211#else
212/** @note This is a naive approach that ASSUMES that the register isn't
213 * allocated, so it only works safely for the first allocation(s) in
214 * a MC block. */
215# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
216 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
217
218DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
219
220DECL_INLINE_THROW(uint32_t)
221iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
222{
223 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
224 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
225 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
226
227# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
228 /* Must flush the register if it hold pending writes. */
229 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
230 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
231 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
232# endif
233
234 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
235 return off;
236}
237
238#endif /* RT_ARCH_AMD64 */
239
240
241
242/*********************************************************************************************************************************
243* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
244*********************************************************************************************************************************/
245
246#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
247 pReNative->fMc = 0; \
248 pReNative->fCImpl = (a_fFlags); \
249 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
250 a_cbInstr) /** @todo not used ... */
251
252
253#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
254 pReNative->fMc = 0; \
255 pReNative->fCImpl = (a_fFlags); \
256 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
257
258DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
259 uint8_t idxInstr, uint64_t a_fGstShwFlush,
260 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
261{
262 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
263}
264
265
266#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
267 pReNative->fMc = 0; \
268 pReNative->fCImpl = (a_fFlags); \
269 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
270 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
271
272DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
273 uint8_t idxInstr, uint64_t a_fGstShwFlush,
274 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
275{
276 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
277}
278
279
280#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
281 pReNative->fMc = 0; \
282 pReNative->fCImpl = (a_fFlags); \
283 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
284 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
285
286DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
287 uint8_t idxInstr, uint64_t a_fGstShwFlush,
288 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
289 uint64_t uArg2)
290{
291 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
292}
293
294
295
296/*********************************************************************************************************************************
297* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
298*********************************************************************************************************************************/
299
300/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
301 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
302DECL_INLINE_THROW(uint32_t)
303iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
304{
305 /*
306 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
307 * return with special status code and make the execution loop deal with
308 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
309 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
310 * could continue w/o interruption, it probably will drop into the
311 * debugger, so not worth the effort of trying to services it here and we
312 * just lump it in with the handling of the others.
313 *
314 * To simplify the code and the register state management even more (wrt
315 * immediate in AND operation), we always update the flags and skip the
316 * extra check associated conditional jump.
317 */
318 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
319 <= UINT32_MAX);
320#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
321 AssertMsg( pReNative->idxCurCall == 0
322 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
323 IEMLIVENESSBIT_IDX_EFL_OTHER)),
324 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
325 IEMLIVENESSBIT_IDX_EFL_OTHER)));
326#endif
327
328 /*
329 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
330 * any pending register writes must be flushed.
331 */
332 off = iemNativeRegFlushPendingWrites(pReNative, off);
333
334 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
335 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
336 true /*fSkipLivenessAssert*/);
337 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxEflReg,
338 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
339 kIemNativeLabelType_ReturnWithFlags);
340 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
341 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
342
343 /* Free but don't flush the EFLAGS register. */
344 iemNativeRegFreeTmp(pReNative, idxEflReg);
345
346 return off;
347}
348
349
350/** Helper for iemNativeEmitFinishInstructionWithStatus. */
351DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
352{
353 unsigned const offOpcodes = pCallEntry->offOpcode;
354 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
355 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
356 {
357 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
358 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
359 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
360 }
361 AssertFailedReturn(NIL_RTGCPHYS);
362}
363
364
365/** The VINF_SUCCESS dummy. */
366template<int const a_rcNormal, bool const a_fIsJump>
367DECL_FORCE_INLINE_THROW(uint32_t)
368iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
369 int32_t const offJump)
370{
371 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
372 if (a_rcNormal != VINF_SUCCESS)
373 {
374#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
375 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
376#else
377 RT_NOREF_PV(pCallEntry);
378#endif
379
380 /* As this code returns from the TB any pending register writes must be flushed. */
381 off = iemNativeRegFlushPendingWrites(pReNative, off);
382
383 /*
384 * Use the lookup table for getting to the next TB quickly.
385 * Note! In this code path there can only be one entry at present.
386 */
387 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
388 PCIEMTB const pTbOrg = pReNative->pTbOrg;
389 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
390 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
391
392#if 0
393 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
394 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
395 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
396 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
397 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
398
399 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
400
401#else
402 /* Load the index as argument #1 for the helper call at the given label. */
403 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
404
405 /*
406 * Figure out the physical address of the current instruction and see
407 * whether the next instruction we're about to execute is in the same
408 * page so we by can optimistically skip TLB loading.
409 *
410 * - This is safe for all cases in FLAT mode.
411 * - In segmentmented modes it is complicated, given that a negative
412 * jump may underflow EIP and a forward jump may overflow or run into
413 * CS.LIM and triggering a #GP. The only thing we can get away with
414 * now at compile time is forward jumps w/o CS.LIM checks, since the
415 * lack of CS.LIM checks means we're good for the entire physical page
416 * we're executing on and another 15 bytes before we run into CS.LIM.
417 */
418 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
419# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
420 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
421# endif
422 )
423 {
424 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
425 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
426 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
427 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
428
429 {
430 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
431 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
432
433 /* Load the key lookup flags into the 2nd argument for the helper call.
434 - This is safe wrt CS limit checking since we're only here for FLAT modes.
435 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
436 interrupt shadow.
437 - The NMI inhibiting is more questionable, though... */
438 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
439 * Should we copy it into fExec to simplify this? OTOH, it's just a
440 * couple of extra instructions if EFLAGS are already in a register. */
441 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
442 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
443
444 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
445 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookup);
446 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithIrq);
447 }
448 }
449 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
450 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlb);
451 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq);
452#endif
453 }
454 return off;
455}
456
457
458#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
459 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
460 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
461
462#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
463 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
464 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
465 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
466
467/** Same as iemRegAddToRip64AndFinishingNoFlags. */
468DECL_INLINE_THROW(uint32_t)
469iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
470{
471#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
472# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
473 if (!pReNative->Core.offPc)
474 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
475# endif
476
477 /* Allocate a temporary PC register. */
478 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
479
480 /* Perform the addition and store the result. */
481 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
482 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
483
484 /* Free but don't flush the PC register. */
485 iemNativeRegFreeTmp(pReNative, idxPcReg);
486#endif
487
488#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
489 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
490
491 pReNative->Core.offPc += cbInstr;
492# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
493 off = iemNativePcAdjustCheck(pReNative, off);
494# endif
495 if (pReNative->cCondDepth)
496 off = iemNativeEmitPcWriteback(pReNative, off);
497 else
498 pReNative->Core.cInstrPcUpdateSkipped++;
499#endif
500
501 return off;
502}
503
504
505#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
506 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
507 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
508
509#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
510 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
511 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
512 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
513
514/** Same as iemRegAddToEip32AndFinishingNoFlags. */
515DECL_INLINE_THROW(uint32_t)
516iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
517{
518#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
519# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
520 if (!pReNative->Core.offPc)
521 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
522# endif
523
524 /* Allocate a temporary PC register. */
525 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
526
527 /* Perform the addition and store the result. */
528 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
529 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
530
531 /* Free but don't flush the PC register. */
532 iemNativeRegFreeTmp(pReNative, idxPcReg);
533#endif
534
535#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
536 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
537
538 pReNative->Core.offPc += cbInstr;
539# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
540 off = iemNativePcAdjustCheck(pReNative, off);
541# endif
542 if (pReNative->cCondDepth)
543 off = iemNativeEmitPcWriteback(pReNative, off);
544 else
545 pReNative->Core.cInstrPcUpdateSkipped++;
546#endif
547
548 return off;
549}
550
551
552#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
553 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
554 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
555
556#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
557 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
558 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
559 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
560
561/** Same as iemRegAddToIp16AndFinishingNoFlags. */
562DECL_INLINE_THROW(uint32_t)
563iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
564{
565#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
566# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
567 if (!pReNative->Core.offPc)
568 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
569# endif
570
571 /* Allocate a temporary PC register. */
572 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
573
574 /* Perform the addition and store the result. */
575 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
576 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
577 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
578
579 /* Free but don't flush the PC register. */
580 iemNativeRegFreeTmp(pReNative, idxPcReg);
581#endif
582
583#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
584 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
585
586 pReNative->Core.offPc += cbInstr;
587# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
588 off = iemNativePcAdjustCheck(pReNative, off);
589# endif
590 if (pReNative->cCondDepth)
591 off = iemNativeEmitPcWriteback(pReNative, off);
592 else
593 pReNative->Core.cInstrPcUpdateSkipped++;
594#endif
595
596 return off;
597}
598
599
600
601/*********************************************************************************************************************************
602* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
603*********************************************************************************************************************************/
604
605#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
606 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
607 (a_enmEffOpSize), pCallEntry->idxInstr); \
608 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
609
610#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
611 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
612 (a_enmEffOpSize), pCallEntry->idxInstr); \
613 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
614 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
615
616#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
617 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
618 IEMMODE_16BIT, pCallEntry->idxInstr); \
619 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
620
621#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
622 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
623 IEMMODE_16BIT, pCallEntry->idxInstr); \
624 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
625 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
626
627#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
628 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
629 IEMMODE_64BIT, pCallEntry->idxInstr); \
630 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
631
632#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
633 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
634 IEMMODE_64BIT, pCallEntry->idxInstr); \
635 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
636 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
637
638/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
639 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
640 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
641DECL_INLINE_THROW(uint32_t)
642iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
643 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
644{
645 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
646
647 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
648 off = iemNativeRegFlushPendingWrites(pReNative, off);
649
650#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
651 Assert(pReNative->Core.offPc == 0);
652
653 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
654#endif
655
656 /* Allocate a temporary PC register. */
657 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
658
659 /* Perform the addition. */
660 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
661
662 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
663 {
664 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
665 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
666 }
667 else
668 {
669 /* Just truncate the result to 16-bit IP. */
670 Assert(enmEffOpSize == IEMMODE_16BIT);
671 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
672 }
673 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
674
675 /* Free but don't flush the PC register. */
676 iemNativeRegFreeTmp(pReNative, idxPcReg);
677
678 return off;
679}
680
681
682#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
683 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
684 (a_enmEffOpSize), pCallEntry->idxInstr); \
685 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
686
687#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
688 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
689 (a_enmEffOpSize), pCallEntry->idxInstr); \
690 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
691 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
692
693#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
694 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
695 IEMMODE_16BIT, pCallEntry->idxInstr); \
696 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
697
698#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
699 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
700 IEMMODE_16BIT, pCallEntry->idxInstr); \
701 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
702 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
703
704#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
705 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
706 IEMMODE_32BIT, pCallEntry->idxInstr); \
707 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
708
709#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
710 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
711 IEMMODE_32BIT, pCallEntry->idxInstr); \
712 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
713 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
714
715/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
716 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
717 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
718DECL_INLINE_THROW(uint32_t)
719iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
720 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
721{
722 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
723
724 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
725 off = iemNativeRegFlushPendingWrites(pReNative, off);
726
727#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
728 Assert(pReNative->Core.offPc == 0);
729
730 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
731#endif
732
733 /* Allocate a temporary PC register. */
734 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
735
736 /* Perform the addition. */
737 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
738
739 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
740 if (enmEffOpSize == IEMMODE_16BIT)
741 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
742
743 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
744/** @todo we can skip this in 32-bit FLAT mode. */
745 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
746
747 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
748
749 /* Free but don't flush the PC register. */
750 iemNativeRegFreeTmp(pReNative, idxPcReg);
751
752 return off;
753}
754
755
756#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
757 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
758 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
759
760#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
761 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
762 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
763 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
764
765#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
766 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
767 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
768
769#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
770 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
771 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
772 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
773
774#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
775 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
776 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
777
778#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
779 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
780 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
781 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
782
783/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
784DECL_INLINE_THROW(uint32_t)
785iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
786 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
787{
788 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
789 off = iemNativeRegFlushPendingWrites(pReNative, off);
790
791#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
792 Assert(pReNative->Core.offPc == 0);
793
794 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
795#endif
796
797 /* Allocate a temporary PC register. */
798 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
799
800 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
801 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
802 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
803 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
804 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
805
806 /* Free but don't flush the PC register. */
807 iemNativeRegFreeTmp(pReNative, idxPcReg);
808
809 return off;
810}
811
812
813
814/*********************************************************************************************************************************
815* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
816*********************************************************************************************************************************/
817
818/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
819#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
820 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
821
822/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
823#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
824 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
825
826/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
827#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
828 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
829
830/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
831 * clears flags. */
832#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
833 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
834 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
835
836/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
837 * clears flags. */
838#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
839 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
840 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
841
842/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
843 * clears flags. */
844#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
845 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
846 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
847
848#undef IEM_MC_SET_RIP_U16_AND_FINISH
849
850
851/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
852#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
853 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
854
855/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
856#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
857 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
858
859/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
860 * clears flags. */
861#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
862 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
863 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
864
865/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
866 * and clears flags. */
867#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
868 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
869 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
870
871#undef IEM_MC_SET_RIP_U32_AND_FINISH
872
873
874/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
875#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
876 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
877
878/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
879 * and clears flags. */
880#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
881 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
882 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
883
884#undef IEM_MC_SET_RIP_U64_AND_FINISH
885
886
887/** Same as iemRegRipJumpU16AndFinishNoFlags,
888 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
889DECL_INLINE_THROW(uint32_t)
890iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
891 uint8_t idxInstr, uint8_t cbVar)
892{
893 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
894 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
895
896 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
897 off = iemNativeRegFlushPendingWrites(pReNative, off);
898
899#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
900 Assert(pReNative->Core.offPc == 0);
901
902 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
903#endif
904
905 /* Get a register with the new PC loaded from idxVarPc.
906 Note! This ASSUMES that the high bits of the GPR is zeroed. */
907 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
908
909 /* Check limit (may #GP(0) + exit TB). */
910 if (!f64Bit)
911/** @todo we can skip this test in FLAT 32-bit mode. */
912 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
913 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
914 else if (cbVar > sizeof(uint32_t))
915 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
916
917 /* Store the result. */
918 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
919
920 iemNativeVarRegisterRelease(pReNative, idxVarPc);
921 /** @todo implictly free the variable? */
922
923 return off;
924}
925
926
927
928/*********************************************************************************************************************************
929* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
930*********************************************************************************************************************************/
931
932/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
933 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
934DECL_FORCE_INLINE_THROW(uint32_t)
935iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
936{
937 /* Use16BitSp: */
938#ifdef RT_ARCH_AMD64
939 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
940 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
941#else
942 /* sub regeff, regrsp, #cbMem */
943 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
944 /* and regeff, regeff, #0xffff */
945 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
946 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
947 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
948 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
949#endif
950 return off;
951}
952
953
954DECL_FORCE_INLINE(uint32_t)
955iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
956{
957 /* Use32BitSp: */
958 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
959 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
960 return off;
961}
962
963
964DECL_INLINE_THROW(uint32_t)
965iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
966 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
967{
968 /*
969 * Assert sanity.
970 */
971#ifdef VBOX_STRICT
972 if (RT_BYTE2(cBitsVarAndFlat) != 0)
973 {
974 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
975 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
976 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
977 Assert( pfnFunction
978 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
979 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
980 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
981 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
982 : UINT64_C(0xc000b000a0009000) ));
983 }
984 else
985 Assert( pfnFunction
986 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
987 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
988 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
989 : UINT64_C(0xc000b000a0009000) ));
990#endif
991
992#ifdef VBOX_STRICT
993 /*
994 * Check that the fExec flags we've got make sense.
995 */
996 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
997#endif
998
999 /*
1000 * To keep things simple we have to commit any pending writes first as we
1001 * may end up making calls.
1002 */
1003 /** @todo we could postpone this till we make the call and reload the
1004 * registers after returning from the call. Not sure if that's sensible or
1005 * not, though. */
1006 off = iemNativeRegFlushPendingWrites(pReNative, off);
1007
1008 /*
1009 * First we calculate the new RSP and the effective stack pointer value.
1010 * For 64-bit mode and flat 32-bit these two are the same.
1011 * (Code structure is very similar to that of PUSH)
1012 */
1013 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1014 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1015 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1016 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1017 ? cbMem : sizeof(uint16_t);
1018 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1019 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1020 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1021 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1022 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1023 if (cBitsFlat != 0)
1024 {
1025 Assert(idxRegEffSp == idxRegRsp);
1026 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1027 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1028 if (cBitsFlat == 64)
1029 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1030 else
1031 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1032 }
1033 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1034 {
1035 Assert(idxRegEffSp != idxRegRsp);
1036 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1037 kIemNativeGstRegUse_ReadOnly);
1038#ifdef RT_ARCH_AMD64
1039 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1040#else
1041 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1042#endif
1043 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1044 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1045 offFixupJumpToUseOtherBitSp = off;
1046 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1047 {
1048 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1049 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1050 }
1051 else
1052 {
1053 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1054 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1055 }
1056 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1057 }
1058 /* SpUpdateEnd: */
1059 uint32_t const offLabelSpUpdateEnd = off;
1060
1061 /*
1062 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1063 * we're skipping lookup).
1064 */
1065 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1066 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1067 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1068 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1069 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1070 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1071 : UINT32_MAX;
1072 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1073
1074
1075 if (!TlbState.fSkip)
1076 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1077 else
1078 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1079
1080 /*
1081 * Use16BitSp:
1082 */
1083 if (cBitsFlat == 0)
1084 {
1085#ifdef RT_ARCH_AMD64
1086 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1087#else
1088 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1089#endif
1090 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1091 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1092 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1093 else
1094 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1095 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1096 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1097 }
1098
1099 /*
1100 * TlbMiss:
1101 *
1102 * Call helper to do the pushing.
1103 */
1104 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1105
1106#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1107 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1108#else
1109 RT_NOREF(idxInstr);
1110#endif
1111
1112 /* Save variables in volatile registers. */
1113 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1114 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1115 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1116 | (RT_BIT_32(idxRegPc));
1117 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1118
1119 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1120 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1121 {
1122 /* Swap them using ARG0 as temp register: */
1123 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1124 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1125 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1126 }
1127 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1128 {
1129 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1130 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1131
1132 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1133 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1134 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1135 }
1136 else
1137 {
1138 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1139 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1140
1141 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1142 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1143 }
1144
1145 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1146 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1147
1148 /* Done setting up parameters, make the call. */
1149 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1150
1151 /* Restore variables and guest shadow registers to volatile registers. */
1152 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1153 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1154
1155#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1156 if (!TlbState.fSkip)
1157 {
1158 /* end of TlbMiss - Jump to the done label. */
1159 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1160 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1161
1162 /*
1163 * TlbLookup:
1164 */
1165 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1166 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1167
1168 /*
1169 * Emit code to do the actual storing / fetching.
1170 */
1171 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1172# ifdef IEM_WITH_TLB_STATISTICS
1173 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1174 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1175# endif
1176 switch (cbMemAccess)
1177 {
1178 case 2:
1179 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1180 break;
1181 case 4:
1182 if (!fIsIntelSeg)
1183 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1184 else
1185 {
1186 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1187 PUSH FS in real mode, so we have to try emulate that here.
1188 We borrow the now unused idxReg1 from the TLB lookup code here. */
1189 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1190 kIemNativeGstReg_EFlags);
1191 if (idxRegEfl != UINT8_MAX)
1192 {
1193#ifdef ARCH_AMD64
1194 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1195 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1196 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1197#else
1198 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1199 off, TlbState.idxReg1, idxRegEfl,
1200 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1201#endif
1202 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1203 }
1204 else
1205 {
1206 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1207 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1208 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1209 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1210 }
1211 /* ASSUMES the upper half of idxRegPc is ZERO. */
1212 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1213 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1214 }
1215 break;
1216 case 8:
1217 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1218 break;
1219 default:
1220 AssertFailed();
1221 }
1222
1223 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1224 TlbState.freeRegsAndReleaseVars(pReNative);
1225
1226 /*
1227 * TlbDone:
1228 *
1229 * Commit the new RSP value.
1230 */
1231 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1232 }
1233#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1234
1235#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1236 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1237#endif
1238 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1239 if (idxRegEffSp != idxRegRsp)
1240 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1241
1242 return off;
1243}
1244
1245
1246/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1247#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1248 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1249
1250/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1251 * clears flags. */
1252#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1253 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1254 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1255
1256/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1257#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1258 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1259
1260/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1261 * clears flags. */
1262#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1263 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1264 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1265
1266#undef IEM_MC_IND_CALL_U16_AND_FINISH
1267
1268
1269/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1270#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1271 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1272
1273/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1274 * clears flags. */
1275#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1276 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1277 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1278
1279#undef IEM_MC_IND_CALL_U32_AND_FINISH
1280
1281
1282/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1283 * an extra parameter, for use in 64-bit code. */
1284#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1285 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1286
1287
1288/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1289 * an extra parameter, for use in 64-bit code and we need to check and clear
1290 * flags. */
1291#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1292 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1293 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1294
1295#undef IEM_MC_IND_CALL_U64_AND_FINISH
1296
1297/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1298 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1299DECL_INLINE_THROW(uint32_t)
1300iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1301 uint8_t idxInstr, uint8_t cbVar)
1302{
1303 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1304 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1305
1306 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1307 off = iemNativeRegFlushPendingWrites(pReNative, off);
1308
1309#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1310 Assert(pReNative->Core.offPc == 0);
1311
1312 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1313#endif
1314
1315 /* Get a register with the new PC loaded from idxVarPc.
1316 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1317 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1318
1319 /* Check limit (may #GP(0) + exit TB). */
1320 if (!f64Bit)
1321/** @todo we can skip this test in FLAT 32-bit mode. */
1322 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1323 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1324 else if (cbVar > sizeof(uint32_t))
1325 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1326
1327#if 1
1328 /* Allocate a temporary PC register, we don't want it shadowed. */
1329 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1330 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1331#else
1332 /* Allocate a temporary PC register. */
1333 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1334 true /*fNoVolatileRegs*/);
1335#endif
1336
1337 /* Perform the addition and push the variable to the guest stack. */
1338 /** @todo Flat variants for PC32 variants. */
1339 switch (cbVar)
1340 {
1341 case sizeof(uint16_t):
1342 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1343 /* Truncate the result to 16-bit IP. */
1344 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1345 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1346 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1347 break;
1348 case sizeof(uint32_t):
1349 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1350 /** @todo In FLAT mode we can use the flat variant. */
1351 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1352 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1353 break;
1354 case sizeof(uint64_t):
1355 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1356 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1357 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1358 break;
1359 default:
1360 AssertFailed();
1361 }
1362
1363 /* RSP got changed, so do this again. */
1364 off = iemNativeRegFlushPendingWrites(pReNative, off);
1365
1366 /* Store the result. */
1367 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1368
1369#if 1
1370 /* Need to transfer the shadow information to the new RIP register. */
1371 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1372#else
1373 /* Sync the new PC. */
1374 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1375#endif
1376 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1377 iemNativeRegFreeTmp(pReNative, idxPcReg);
1378 /** @todo implictly free the variable? */
1379
1380 return off;
1381}
1382
1383
1384/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1385 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1386#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1387 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1388
1389/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1390 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1391 * flags. */
1392#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1393 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1394 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1395
1396/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1397 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1398#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1399 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1400
1401/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1402 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1403 * flags. */
1404#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1405 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1406 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1407
1408/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1409 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1410#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1411 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1412
1413/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1414 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1415 * flags. */
1416#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1417 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1418 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1419
1420#undef IEM_MC_REL_CALL_S16_AND_FINISH
1421
1422/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1423 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1424DECL_INLINE_THROW(uint32_t)
1425iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
1426 uint8_t idxInstr)
1427{
1428 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1429 off = iemNativeRegFlushPendingWrites(pReNative, off);
1430
1431#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1432 Assert(pReNative->Core.offPc == 0);
1433
1434 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1435#endif
1436
1437 /* Allocate a temporary PC register. */
1438 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1439 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1440 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1441
1442 /* Calculate the new RIP. */
1443 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1444 /* Truncate the result to 16-bit IP. */
1445 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
1446 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1447 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1448
1449 /* Truncate the result to 16-bit IP. */
1450 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
1451
1452 /* Check limit (may #GP(0) + exit TB). */
1453 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1454
1455 /* Perform the addition and push the variable to the guest stack. */
1456 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1457 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1458
1459 /* RSP got changed, so flush again. */
1460 off = iemNativeRegFlushPendingWrites(pReNative, off);
1461
1462 /* Store the result. */
1463 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1464
1465 /* Need to transfer the shadow information to the new RIP register. */
1466 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1467 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1468 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1469
1470 return off;
1471}
1472
1473
1474/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1475 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1476#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
1477 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
1478
1479/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1480 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1481 * flags. */
1482#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
1483 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
1484 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1485
1486#undef IEM_MC_REL_CALL_S32_AND_FINISH
1487
1488/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1489 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1490DECL_INLINE_THROW(uint32_t)
1491iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
1492 uint8_t idxInstr)
1493{
1494 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1495 off = iemNativeRegFlushPendingWrites(pReNative, off);
1496
1497#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1498 Assert(pReNative->Core.offPc == 0);
1499
1500 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1501#endif
1502
1503 /* Allocate a temporary PC register. */
1504 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1505 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1506 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1507
1508 /* Update the EIP to get the return address. */
1509 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1510
1511 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1512 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1513 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1514 /** @todo we can skip this test in FLAT 32-bit mode. */
1515 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1516
1517 /* Perform Perform the return address to the guest stack. */
1518 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
1519 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1520 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1521
1522 /* RSP got changed, so do this again. */
1523 off = iemNativeRegFlushPendingWrites(pReNative, off);
1524
1525 /* Store the result. */
1526 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1527
1528 /* Need to transfer the shadow information to the new RIP register. */
1529 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1530 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1531 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1532
1533 return off;
1534}
1535
1536
1537/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1538 * an extra parameter, for use in 64-bit code. */
1539#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
1540 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
1541
1542/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1543 * an extra parameter, for use in 64-bit code and we need to check and clear
1544 * flags. */
1545#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
1546 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
1547 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1548
1549#undef IEM_MC_REL_CALL_S64_AND_FINISH
1550
1551/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1552 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1553DECL_INLINE_THROW(uint32_t)
1554iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
1555 uint8_t idxInstr)
1556{
1557 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1558 off = iemNativeRegFlushPendingWrites(pReNative, off);
1559
1560#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1561 Assert(pReNative->Core.offPc == 0);
1562
1563 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1564#endif
1565
1566 /* Allocate a temporary PC register. */
1567 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1568 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1569 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1570
1571 /* Update the RIP to get the return address. */
1572 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
1573
1574 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1575 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
1576 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
1577 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1578
1579 /* Perform Perform the return address to the guest stack. */
1580 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1581 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1582
1583 /* RSP got changed, so do this again. */
1584 off = iemNativeRegFlushPendingWrites(pReNative, off);
1585
1586 /* Store the result. */
1587 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1588
1589 /* Need to transfer the shadow information to the new RIP register. */
1590 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1591 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1592 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1593
1594 return off;
1595}
1596
1597
1598/*********************************************************************************************************************************
1599* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
1600*********************************************************************************************************************************/
1601
1602DECL_FORCE_INLINE_THROW(uint32_t)
1603iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1604 uint16_t cbPopAdd, uint8_t idxRegTmp)
1605{
1606 /* Use16BitSp: */
1607#ifdef RT_ARCH_AMD64
1608 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1609 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1610 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
1611 RT_NOREF(idxRegTmp);
1612#elif defined(RT_ARCH_ARM64)
1613 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
1614 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
1615 /* add tmp, regrsp, #cbMem */
1616 uint16_t const cbCombined = cbMem + cbPopAdd;
1617 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
1618 if (cbCombined >= RT_BIT_32(12))
1619 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
1620 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1621 /* and tmp, tmp, #0xffff */
1622 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1623 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
1624 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
1625 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
1626#else
1627# error "Port me"
1628#endif
1629 return off;
1630}
1631
1632
1633DECL_FORCE_INLINE_THROW(uint32_t)
1634iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1635 uint16_t cbPopAdd)
1636{
1637 /* Use32BitSp: */
1638 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1639 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
1640 return off;
1641}
1642
1643
1644/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
1645#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
1646 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
1647
1648/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
1649#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1650 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1651
1652/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
1653#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1654 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1655
1656/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
1657 * clears flags. */
1658#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
1659 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
1660 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1661
1662/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
1663 * clears flags. */
1664#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1665 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1666 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1667
1668/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
1669 * clears flags. */
1670#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1671 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1672 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1673
1674/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
1675DECL_INLINE_THROW(uint32_t)
1676iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
1677 IEMMODE enmEffOpSize, uint8_t idxInstr)
1678{
1679 RT_NOREF(cbInstr);
1680
1681#ifdef VBOX_STRICT
1682 /*
1683 * Check that the fExec flags we've got make sense.
1684 */
1685 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1686#endif
1687
1688 /*
1689 * To keep things simple we have to commit any pending writes first as we
1690 * may end up making calls.
1691 */
1692 off = iemNativeRegFlushPendingWrites(pReNative, off);
1693
1694 /*
1695 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
1696 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
1697 * directly as the effective stack pointer.
1698 * (Code structure is very similar to that of PUSH)
1699 *
1700 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
1701 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
1702 * aren't commonly used (or useful) and thus not in need of optimizing.
1703 *
1704 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
1705 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
1706 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
1707 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
1708 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
1709 */
1710 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
1711 ? sizeof(uint64_t)
1712 : enmEffOpSize == IEMMODE_32BIT
1713 ? sizeof(uint32_t)
1714 : sizeof(uint16_t);
1715 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
1716 uintptr_t const pfnFunction = fFlat
1717 ? enmEffOpSize == IEMMODE_64BIT
1718 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
1719 : (uintptr_t)iemNativeHlpStackFlatFetchU32
1720 : enmEffOpSize == IEMMODE_32BIT
1721 ? (uintptr_t)iemNativeHlpStackFetchU32
1722 : (uintptr_t)iemNativeHlpStackFetchU16;
1723 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1724 fFlat ? kIemNativeGstRegUse_ForUpdate
1725 : kIemNativeGstRegUse_Calculation,
1726 true /*fNoVolatileRegs*/);
1727 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1728 /** @todo can do a better job picking the register here. For cbMem >= 4 this
1729 * will be the resulting register value. */
1730 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
1731
1732 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1733 if (fFlat)
1734 Assert(idxRegEffSp == idxRegRsp);
1735 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1736 {
1737 Assert(idxRegEffSp != idxRegRsp);
1738 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1739 kIemNativeGstRegUse_ReadOnly);
1740#ifdef RT_ARCH_AMD64
1741 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1742#else
1743 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1744#endif
1745 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1746 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1747 offFixupJumpToUseOtherBitSp = off;
1748 if (enmEffOpSize == IEMMODE_32BIT)
1749 {
1750 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1751 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1752 }
1753 else
1754 {
1755 Assert(enmEffOpSize == IEMMODE_16BIT);
1756 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1757 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1758 idxRegMemResult);
1759 }
1760 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1761 }
1762 /* SpUpdateEnd: */
1763 uint32_t const offLabelSpUpdateEnd = off;
1764
1765 /*
1766 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1767 * we're skipping lookup).
1768 */
1769 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
1770 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1771 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1772 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1773 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1774 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1775 : UINT32_MAX;
1776
1777 if (!TlbState.fSkip)
1778 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1779 else
1780 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1781
1782 /*
1783 * Use16BitSp:
1784 */
1785 if (!fFlat)
1786 {
1787#ifdef RT_ARCH_AMD64
1788 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1789#else
1790 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1791#endif
1792 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1793 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1794 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1795 idxRegMemResult);
1796 else
1797 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1798 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1799 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1800 }
1801
1802 /*
1803 * TlbMiss:
1804 *
1805 * Call helper to do the pushing.
1806 */
1807 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1808
1809#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1810 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1811#else
1812 RT_NOREF(idxInstr);
1813#endif
1814
1815 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1816 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1817 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
1818 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1819
1820
1821 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
1822 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1823 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1824
1825 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1826 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1827
1828 /* Done setting up parameters, make the call. */
1829 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1830
1831 /* Move the return register content to idxRegMemResult. */
1832 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
1833 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
1834
1835 /* Restore variables and guest shadow registers to volatile registers. */
1836 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1837 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1838
1839#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1840 if (!TlbState.fSkip)
1841 {
1842 /* end of TlbMiss - Jump to the done label. */
1843 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1844 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1845
1846 /*
1847 * TlbLookup:
1848 */
1849 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
1850 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1851
1852 /*
1853 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
1854 */
1855 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1856# ifdef IEM_WITH_TLB_STATISTICS
1857 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1858 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1859# endif
1860 switch (cbMem)
1861 {
1862 case 2:
1863 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1864 break;
1865 case 4:
1866 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1867 break;
1868 case 8:
1869 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1870 break;
1871 default:
1872 AssertFailed();
1873 }
1874
1875 TlbState.freeRegsAndReleaseVars(pReNative);
1876
1877 /*
1878 * TlbDone:
1879 *
1880 * Set the new RSP value (FLAT accesses needs to calculate it first) and
1881 * commit the popped register value.
1882 */
1883 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1884 }
1885#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1886
1887 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
1888 if (!f64Bit)
1889/** @todo we can skip this test in FLAT 32-bit mode. */
1890 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1891 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1892 else if (enmEffOpSize == IEMMODE_64BIT)
1893 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1894
1895 /* Complete RSP calculation for FLAT mode. */
1896 if (idxRegEffSp == idxRegRsp)
1897 {
1898 if (enmEffOpSize == IEMMODE_64BIT)
1899 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
1900 else
1901 {
1902 Assert(enmEffOpSize == IEMMODE_32BIT);
1903 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
1904 }
1905 }
1906
1907 /* Commit the result and clear any current guest shadows for RIP. */
1908 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
1909 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1910 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
1911
1912 /* Need to transfer the shadowing information to the host register containing the updated value now. */
1913 if (!fFlat)
1914 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
1915
1916 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1917 if (idxRegEffSp != idxRegRsp)
1918 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1919 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1920 return off;
1921}
1922
1923
1924/*********************************************************************************************************************************
1925* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
1926*********************************************************************************************************************************/
1927
1928#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
1929 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1930
1931/**
1932 * Emits code to check if a \#NM exception should be raised.
1933 *
1934 * @returns New code buffer offset, UINT32_MAX on failure.
1935 * @param pReNative The native recompile state.
1936 * @param off The code buffer offset.
1937 * @param idxInstr The current instruction.
1938 */
1939DECL_INLINE_THROW(uint32_t)
1940iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1941{
1942#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1943 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
1944
1945 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
1946 {
1947#endif
1948 /*
1949 * Make sure we don't have any outstanding guest register writes as we may
1950 * raise an #NM and all guest register must be up to date in CPUMCTX.
1951 */
1952 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
1953 off = iemNativeRegFlushPendingWrites(pReNative, off);
1954
1955#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1956 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1957#else
1958 RT_NOREF(idxInstr);
1959#endif
1960
1961 /* Allocate a temporary CR0 register. */
1962 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
1963 kIemNativeGstRegUse_ReadOnly);
1964
1965 /*
1966 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
1967 * return raisexcpt();
1968 */
1969 /* Test and jump. */
1970 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS,
1971 kIemNativeLabelType_RaiseNm);
1972
1973 /* Free but don't flush the CR0 register. */
1974 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1975
1976#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1977 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
1978 }
1979 else
1980 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
1981#endif
1982
1983 return off;
1984}
1985
1986
1987#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
1988 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1989
1990/**
1991 * Emits code to check if a \#NM exception should be raised.
1992 *
1993 * @returns New code buffer offset, UINT32_MAX on failure.
1994 * @param pReNative The native recompile state.
1995 * @param off The code buffer offset.
1996 * @param idxInstr The current instruction.
1997 */
1998DECL_INLINE_THROW(uint32_t)
1999iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2000{
2001#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2002 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2003
2004 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2005 {
2006#endif
2007 /*
2008 * Make sure we don't have any outstanding guest register writes as we may
2009 * raise an #NM and all guest register must be up to date in CPUMCTX.
2010 */
2011 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2012 off = iemNativeRegFlushPendingWrites(pReNative, off);
2013
2014#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2015 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2016#else
2017 RT_NOREF(idxInstr);
2018#endif
2019
2020 /* Allocate a temporary CR0 register. */
2021 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2022 kIemNativeGstRegUse_Calculation);
2023
2024 /*
2025 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2026 * return raisexcpt();
2027 */
2028 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2029 /* Test and jump. */
2030 off = iemNativeEmitTestIfGpr32EqualsImmAndTbExit(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS,
2031 kIemNativeLabelType_RaiseNm);
2032
2033 /* Free the CR0 register. */
2034 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2035
2036#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2037 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2038 }
2039 else
2040 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2041#endif
2042
2043 return off;
2044}
2045
2046
2047#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2048 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2049
2050/**
2051 * Emits code to check if a \#MF exception should be raised.
2052 *
2053 * @returns New code buffer offset, UINT32_MAX on failure.
2054 * @param pReNative The native recompile state.
2055 * @param off The code buffer offset.
2056 * @param idxInstr The current instruction.
2057 */
2058DECL_INLINE_THROW(uint32_t)
2059iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2060{
2061 /*
2062 * Make sure we don't have any outstanding guest register writes as we may
2063 * raise an #MF and all guest register must be up to date in CPUMCTX.
2064 */
2065 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2066 off = iemNativeRegFlushPendingWrites(pReNative, off);
2067
2068#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2069 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2070#else
2071 RT_NOREF(idxInstr);
2072#endif
2073
2074 /* Allocate a temporary FSW register. */
2075 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2076 kIemNativeGstRegUse_ReadOnly);
2077
2078 /*
2079 * if (FSW & X86_FSW_ES != 0)
2080 * return raisexcpt();
2081 */
2082 /* Test and jump. */
2083 off = iemNativeEmitTestBitInGprAndTbExitIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, kIemNativeLabelType_RaiseMf);
2084
2085 /* Free but don't flush the FSW register. */
2086 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2087
2088 return off;
2089}
2090
2091
2092#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2093 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2094
2095/**
2096 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2097 *
2098 * @returns New code buffer offset, UINT32_MAX on failure.
2099 * @param pReNative The native recompile state.
2100 * @param off The code buffer offset.
2101 * @param idxInstr The current instruction.
2102 */
2103DECL_INLINE_THROW(uint32_t)
2104iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2105{
2106#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2107 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2108
2109 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2110 {
2111#endif
2112 /*
2113 * Make sure we don't have any outstanding guest register writes as we may
2114 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2115 */
2116 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2117 off = iemNativeRegFlushPendingWrites(pReNative, off);
2118
2119#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2120 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2121#else
2122 RT_NOREF(idxInstr);
2123#endif
2124
2125 /* Allocate a temporary CR0 and CR4 register. */
2126 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2127 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2128 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2129
2130 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2131#ifdef RT_ARCH_AMD64
2132 /*
2133 * We do a modified test here:
2134 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2135 * else { goto RaiseSseRelated; }
2136 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2137 * all targets except the 386, which doesn't support SSE, this should
2138 * be a safe assumption.
2139 */
2140 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2141 //pCodeBuf[off++] = 0xcc;
2142 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2143 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2144 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2145 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2146 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2147 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseSseRelated, kIemNativeInstrCond_ne);
2148
2149#elif defined(RT_ARCH_ARM64)
2150 /*
2151 * We do a modified test here:
2152 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2153 * else { goto RaiseSseRelated; }
2154 */
2155 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2156 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2157 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2158 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2159 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2160 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2161 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2162 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2163 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2164 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2165 kIemNativeLabelType_RaiseSseRelated);
2166
2167#else
2168# error "Port me!"
2169#endif
2170
2171 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2172 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2173 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2174 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2175
2176#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2177 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2178 }
2179 else
2180 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2181#endif
2182
2183 return off;
2184}
2185
2186
2187#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2188 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2189
2190/**
2191 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2192 *
2193 * @returns New code buffer offset, UINT32_MAX on failure.
2194 * @param pReNative The native recompile state.
2195 * @param off The code buffer offset.
2196 * @param idxInstr The current instruction.
2197 */
2198DECL_INLINE_THROW(uint32_t)
2199iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2200{
2201#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2202 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2203
2204 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2205 {
2206#endif
2207 /*
2208 * Make sure we don't have any outstanding guest register writes as we may
2209 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2210 */
2211 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2212 off = iemNativeRegFlushPendingWrites(pReNative, off);
2213
2214#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2215 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2216#else
2217 RT_NOREF(idxInstr);
2218#endif
2219
2220 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2221 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2222 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2223 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2224 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2225
2226 /*
2227 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2228 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2229 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2230 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2231 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2232 * { likely }
2233 * else { goto RaiseAvxRelated; }
2234 */
2235#ifdef RT_ARCH_AMD64
2236 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2237 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2238 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2239 ^ 0x1a) ) { likely }
2240 else { goto RaiseAvxRelated; } */
2241 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2242 //pCodeBuf[off++] = 0xcc;
2243 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2244 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2245 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2246 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2247 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2248 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2249 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2250 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2251 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2252 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2253 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseAvxRelated, kIemNativeInstrCond_ne);
2254
2255#elif defined(RT_ARCH_ARM64)
2256 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2257 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2258 else { goto RaiseAvxRelated; } */
2259 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2260 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2261 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2262 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2263 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2264 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2265 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2266 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2267 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2268 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2269 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2270 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2271 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2272 kIemNativeLabelType_RaiseAvxRelated);
2273
2274#else
2275# error "Port me!"
2276#endif
2277
2278 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2279 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2280 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2281 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2282#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2283 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2284 }
2285 else
2286 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2287#endif
2288
2289 return off;
2290}
2291
2292
2293#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2294#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
2295 off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off, pCallEntry->idxInstr)
2296
2297/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
2298DECL_INLINE_THROW(uint32_t)
2299iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2300{
2301 /*
2302 * Make sure we don't have any outstanding guest register writes as we may
2303 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
2304 */
2305 off = iemNativeRegFlushPendingWrites(pReNative, off);
2306
2307#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2308 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2309#else
2310 RT_NOREF(idxInstr);
2311#endif
2312
2313 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
2314 kIemNativeGstRegUse_ReadOnly);
2315 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
2316
2317 /* mov tmp, varmxcsr */
2318 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
2319 /* tmp &= X86_MXCSR_XCPT_MASK */
2320 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
2321 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
2322 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
2323 /* tmp = ~tmp */
2324 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
2325 /* tmp &= mxcsr */
2326 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
2327 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
2328 kIemNativeLabelType_RaiseSseAvxFpRelated);
2329
2330 /* Free but don't flush the MXCSR register. */
2331 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
2332 iemNativeRegFreeTmp(pReNative, idxRegTmp);
2333
2334 return off;
2335}
2336#endif
2337
2338
2339#define IEM_MC_RAISE_DIVIDE_ERROR() \
2340 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2341
2342/**
2343 * Emits code to raise a \#DE.
2344 *
2345 * @returns New code buffer offset, UINT32_MAX on failure.
2346 * @param pReNative The native recompile state.
2347 * @param off The code buffer offset.
2348 * @param idxInstr The current instruction.
2349 */
2350DECL_INLINE_THROW(uint32_t)
2351iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2352{
2353 /*
2354 * Make sure we don't have any outstanding guest register writes as we may
2355 */
2356 off = iemNativeRegFlushPendingWrites(pReNative, off);
2357
2358#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2359 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2360#else
2361 RT_NOREF(idxInstr);
2362#endif
2363
2364 /* raise \#DE exception unconditionally. */
2365 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseDe);
2366}
2367
2368
2369#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2370 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2371
2372/**
2373 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2374 *
2375 * @returns New code buffer offset, UINT32_MAX on failure.
2376 * @param pReNative The native recompile state.
2377 * @param off The code buffer offset.
2378 * @param idxInstr The current instruction.
2379 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2380 * @param cbAlign The alignment in bytes to check against.
2381 */
2382DECL_INLINE_THROW(uint32_t)
2383iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2384 uint8_t idxVarEffAddr, uint8_t cbAlign)
2385{
2386 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2387 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2388
2389 /*
2390 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2391 */
2392 off = iemNativeRegFlushPendingWrites(pReNative, off);
2393
2394#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2395 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2396#else
2397 RT_NOREF(idxInstr);
2398#endif
2399
2400 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2401
2402 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxVarReg, cbAlign - 1,
2403 kIemNativeLabelType_RaiseGp0);
2404
2405 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2406 return off;
2407}
2408
2409
2410/*********************************************************************************************************************************
2411* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2412*********************************************************************************************************************************/
2413
2414/**
2415 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2416 *
2417 * @returns Pointer to the condition stack entry on success, NULL on failure
2418 * (too many nestings)
2419 */
2420DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
2421{
2422#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2423 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
2424#endif
2425
2426 uint32_t const idxStack = pReNative->cCondDepth;
2427 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2428
2429 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2430 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2431
2432 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2433 pEntry->fInElse = false;
2434 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2435 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2436
2437 return pEntry;
2438}
2439
2440
2441/**
2442 * Start of the if-block, snapshotting the register and variable state.
2443 */
2444DECL_INLINE_THROW(void)
2445iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
2446{
2447 Assert(offIfBlock != UINT32_MAX);
2448 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2449 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2450 Assert(!pEntry->fInElse);
2451
2452 /* Define the start of the IF block if request or for disassembly purposes. */
2453 if (idxLabelIf != UINT32_MAX)
2454 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
2455#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2456 else
2457 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
2458#else
2459 RT_NOREF(offIfBlock);
2460#endif
2461
2462#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2463 Assert(pReNative->Core.offPc == 0);
2464#endif
2465
2466 /* Copy the initial state so we can restore it in the 'else' block. */
2467 pEntry->InitialState = pReNative->Core;
2468}
2469
2470
2471#define IEM_MC_ELSE() } while (0); \
2472 off = iemNativeEmitElse(pReNative, off); \
2473 do {
2474
2475/** Emits code related to IEM_MC_ELSE. */
2476DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2477{
2478 /* Check sanity and get the conditional stack entry. */
2479 Assert(off != UINT32_MAX);
2480 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2481 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2482 Assert(!pEntry->fInElse);
2483
2484#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2485 /* Writeback any dirty shadow registers. */
2486 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2487 * in one of the branches and leave guest registers already dirty before the start of the if
2488 * block alone. */
2489 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2490#endif
2491
2492 /* Jump to the endif */
2493 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
2494
2495 /* Define the else label and enter the else part of the condition. */
2496 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2497 pEntry->fInElse = true;
2498
2499#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2500 Assert(pReNative->Core.offPc == 0);
2501#endif
2502
2503 /* Snapshot the core state so we can do a merge at the endif and restore
2504 the snapshot we took at the start of the if-block. */
2505 pEntry->IfFinalState = pReNative->Core;
2506 pReNative->Core = pEntry->InitialState;
2507
2508 return off;
2509}
2510
2511
2512#define IEM_MC_ENDIF() } while (0); \
2513 off = iemNativeEmitEndIf(pReNative, off)
2514
2515/** Emits code related to IEM_MC_ENDIF. */
2516DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2517{
2518 /* Check sanity and get the conditional stack entry. */
2519 Assert(off != UINT32_MAX);
2520 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2521 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2522
2523#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2524 Assert(pReNative->Core.offPc == 0);
2525#endif
2526#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2527 /* Writeback any dirty shadow registers (else branch). */
2528 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2529 * in one of the branches and leave guest registers already dirty before the start of the if
2530 * block alone. */
2531 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2532#endif
2533
2534 /*
2535 * Now we have find common group with the core state at the end of the
2536 * if-final. Use the smallest common denominator and just drop anything
2537 * that isn't the same in both states.
2538 */
2539 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
2540 * which is why we're doing this at the end of the else-block.
2541 * But we'd need more info about future for that to be worth the effort. */
2542 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
2543#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2544 Assert( pOther->bmGstRegShadowDirty == 0
2545 && pReNative->Core.bmGstRegShadowDirty == 0);
2546#endif
2547
2548 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
2549 {
2550 /* shadow guest stuff first. */
2551 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
2552 if (fGstRegs)
2553 {
2554 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
2555 do
2556 {
2557 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2558 fGstRegs &= ~RT_BIT_64(idxGstReg);
2559
2560 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
2561 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
2562 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
2563 {
2564 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
2565 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
2566
2567#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2568 /* Writeback any dirty shadow registers we are about to unshadow. */
2569 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
2570#endif
2571 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
2572 }
2573 } while (fGstRegs);
2574 }
2575 else
2576 {
2577 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
2578#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2579 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
2580#endif
2581 }
2582
2583 /* Check variables next. For now we must require them to be identical
2584 or stuff we can recreate. */
2585 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
2586 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
2587 if (fVars)
2588 {
2589 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
2590 do
2591 {
2592 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
2593 fVars &= ~RT_BIT_32(idxVar);
2594
2595 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
2596 {
2597 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
2598 continue;
2599 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2600 {
2601 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2602 if (idxHstReg != UINT8_MAX)
2603 {
2604 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2605 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2606 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
2607 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2608 }
2609 continue;
2610 }
2611 }
2612 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
2613 continue;
2614
2615 /* Irreconcilable, so drop it. */
2616 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2617 if (idxHstReg != UINT8_MAX)
2618 {
2619 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2620 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2621 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
2622 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2623 }
2624 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2625 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
2626 } while (fVars);
2627 }
2628
2629 /* Finally, check that the host register allocations matches. */
2630 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
2631 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
2632 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
2633 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
2634 }
2635
2636 /*
2637 * Define the endif label and maybe the else one if we're still in the 'if' part.
2638 */
2639 if (!pEntry->fInElse)
2640 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2641 else
2642 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
2643 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
2644
2645 /* Pop the conditional stack.*/
2646 pReNative->cCondDepth -= 1;
2647
2648 return off;
2649}
2650
2651
2652#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
2653 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
2654 do {
2655
2656/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
2657DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2658{
2659 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2660 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2661
2662 /* Get the eflags. */
2663 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2664 kIemNativeGstRegUse_ReadOnly);
2665
2666 /* Test and jump. */
2667 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2668
2669 /* Free but don't flush the EFlags register. */
2670 iemNativeRegFreeTmp(pReNative, idxEflReg);
2671
2672 /* Make a copy of the core state now as we start the if-block. */
2673 iemNativeCondStartIfBlock(pReNative, off);
2674
2675 return off;
2676}
2677
2678
2679#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
2680 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
2681 do {
2682
2683/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
2684DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2685{
2686 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2687 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2688
2689 /* Get the eflags. */
2690 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2691 kIemNativeGstRegUse_ReadOnly);
2692
2693 /* Test and jump. */
2694 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2695
2696 /* Free but don't flush the EFlags register. */
2697 iemNativeRegFreeTmp(pReNative, idxEflReg);
2698
2699 /* Make a copy of the core state now as we start the if-block. */
2700 iemNativeCondStartIfBlock(pReNative, off);
2701
2702 return off;
2703}
2704
2705
2706#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
2707 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
2708 do {
2709
2710/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
2711DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2712{
2713 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2714 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2715
2716 /* Get the eflags. */
2717 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2718 kIemNativeGstRegUse_ReadOnly);
2719
2720 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2721 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2722
2723 /* Test and jump. */
2724 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2725
2726 /* Free but don't flush the EFlags register. */
2727 iemNativeRegFreeTmp(pReNative, idxEflReg);
2728
2729 /* Make a copy of the core state now as we start the if-block. */
2730 iemNativeCondStartIfBlock(pReNative, off);
2731
2732 return off;
2733}
2734
2735
2736#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
2737 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
2738 do {
2739
2740/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
2741DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2742{
2743 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2744 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2745
2746 /* Get the eflags. */
2747 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2748 kIemNativeGstRegUse_ReadOnly);
2749
2750 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2751 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2752
2753 /* Test and jump. */
2754 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2755
2756 /* Free but don't flush the EFlags register. */
2757 iemNativeRegFreeTmp(pReNative, idxEflReg);
2758
2759 /* Make a copy of the core state now as we start the if-block. */
2760 iemNativeCondStartIfBlock(pReNative, off);
2761
2762 return off;
2763}
2764
2765
2766#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
2767 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
2768 do {
2769
2770#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
2771 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
2772 do {
2773
2774/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
2775DECL_INLINE_THROW(uint32_t)
2776iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2777 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2778{
2779 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
2780 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2781
2782 /* Get the eflags. */
2783 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2784 kIemNativeGstRegUse_ReadOnly);
2785
2786 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2787 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2788
2789 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2790 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2791 Assert(iBitNo1 != iBitNo2);
2792
2793#ifdef RT_ARCH_AMD64
2794 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
2795
2796 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2797 if (iBitNo1 > iBitNo2)
2798 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2799 else
2800 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2801 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2802
2803#elif defined(RT_ARCH_ARM64)
2804 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2805 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2806
2807 /* and tmpreg, eflreg, #1<<iBitNo1 */
2808 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2809
2810 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2811 if (iBitNo1 > iBitNo2)
2812 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2813 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2814 else
2815 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2816 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2817
2818 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2819
2820#else
2821# error "Port me"
2822#endif
2823
2824 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2825 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2826 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2827
2828 /* Free but don't flush the EFlags and tmp registers. */
2829 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2830 iemNativeRegFreeTmp(pReNative, idxEflReg);
2831
2832 /* Make a copy of the core state now as we start the if-block. */
2833 iemNativeCondStartIfBlock(pReNative, off);
2834
2835 return off;
2836}
2837
2838
2839#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
2840 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
2841 do {
2842
2843#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
2844 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
2845 do {
2846
2847/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
2848 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
2849DECL_INLINE_THROW(uint32_t)
2850iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
2851 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2852{
2853 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
2854 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2855
2856 /* We need an if-block label for the non-inverted variant. */
2857 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
2858 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
2859
2860 /* Get the eflags. */
2861 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2862 kIemNativeGstRegUse_ReadOnly);
2863
2864 /* Translate the flag masks to bit numbers. */
2865 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2866 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2867
2868 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2869 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2870 Assert(iBitNo1 != iBitNo);
2871
2872 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2873 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2874 Assert(iBitNo2 != iBitNo);
2875 Assert(iBitNo2 != iBitNo1);
2876
2877#ifdef RT_ARCH_AMD64
2878 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
2879#elif defined(RT_ARCH_ARM64)
2880 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2881#endif
2882
2883 /* Check for the lone bit first. */
2884 if (!fInverted)
2885 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2886 else
2887 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
2888
2889 /* Then extract and compare the other two bits. */
2890#ifdef RT_ARCH_AMD64
2891 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2892 if (iBitNo1 > iBitNo2)
2893 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2894 else
2895 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2896 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2897
2898#elif defined(RT_ARCH_ARM64)
2899 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2900
2901 /* and tmpreg, eflreg, #1<<iBitNo1 */
2902 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2903
2904 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2905 if (iBitNo1 > iBitNo2)
2906 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2907 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2908 else
2909 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2910 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2911
2912 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2913
2914#else
2915# error "Port me"
2916#endif
2917
2918 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2919 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2920 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2921
2922 /* Free but don't flush the EFlags and tmp registers. */
2923 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2924 iemNativeRegFreeTmp(pReNative, idxEflReg);
2925
2926 /* Make a copy of the core state now as we start the if-block. */
2927 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
2928
2929 return off;
2930}
2931
2932
2933#define IEM_MC_IF_CX_IS_NZ() \
2934 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
2935 do {
2936
2937/** Emits code for IEM_MC_IF_CX_IS_NZ. */
2938DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2939{
2940 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2941
2942 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2943 kIemNativeGstRegUse_ReadOnly);
2944 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
2945 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2946
2947 iemNativeCondStartIfBlock(pReNative, off);
2948 return off;
2949}
2950
2951
2952#define IEM_MC_IF_ECX_IS_NZ() \
2953 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
2954 do {
2955
2956#define IEM_MC_IF_RCX_IS_NZ() \
2957 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
2958 do {
2959
2960/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
2961DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2962{
2963 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2964
2965 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2966 kIemNativeGstRegUse_ReadOnly);
2967 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
2968 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2969
2970 iemNativeCondStartIfBlock(pReNative, off);
2971 return off;
2972}
2973
2974
2975#define IEM_MC_IF_CX_IS_NOT_ONE() \
2976 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
2977 do {
2978
2979/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
2980DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2981{
2982 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2983
2984 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2985 kIemNativeGstRegUse_ReadOnly);
2986#ifdef RT_ARCH_AMD64
2987 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2988#else
2989 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2990 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
2991 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2992#endif
2993 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2994
2995 iemNativeCondStartIfBlock(pReNative, off);
2996 return off;
2997}
2998
2999
3000#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3001 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3002 do {
3003
3004#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3005 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3006 do {
3007
3008/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3009DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3010{
3011 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3012
3013 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3014 kIemNativeGstRegUse_ReadOnly);
3015 if (f64Bit)
3016 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3017 else
3018 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3019 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3020
3021 iemNativeCondStartIfBlock(pReNative, off);
3022 return off;
3023}
3024
3025
3026#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3027 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
3028 do {
3029
3030#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3031 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
3032 do {
3033
3034/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3035 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3036DECL_INLINE_THROW(uint32_t)
3037iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
3038{
3039 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3040 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3041
3042 /* We have to load both RCX and EFLAGS before we can start branching,
3043 otherwise we'll end up in the else-block with an inconsistent
3044 register allocator state.
3045 Doing EFLAGS first as it's more likely to be loaded, right? */
3046 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3047 kIemNativeGstRegUse_ReadOnly);
3048 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3049 kIemNativeGstRegUse_ReadOnly);
3050
3051 /** @todo we could reduce this to a single branch instruction by spending a
3052 * temporary register and some setnz stuff. Not sure if loops are
3053 * worth it. */
3054 /* Check CX. */
3055#ifdef RT_ARCH_AMD64
3056 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3057#else
3058 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3059 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3060 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3061#endif
3062
3063 /* Check the EFlags bit. */
3064 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3065 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3066 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3067 !fCheckIfSet /*fJmpIfSet*/);
3068
3069 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3070 iemNativeRegFreeTmp(pReNative, idxEflReg);
3071
3072 iemNativeCondStartIfBlock(pReNative, off);
3073 return off;
3074}
3075
3076
3077#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3078 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3079 do {
3080
3081#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3082 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3083 do {
3084
3085#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3086 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3087 do {
3088
3089#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3090 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3091 do {
3092
3093/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3094 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3095 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3096 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3097DECL_INLINE_THROW(uint32_t)
3098iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3099 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3100{
3101 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3102 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3103
3104 /* We have to load both RCX and EFLAGS before we can start branching,
3105 otherwise we'll end up in the else-block with an inconsistent
3106 register allocator state.
3107 Doing EFLAGS first as it's more likely to be loaded, right? */
3108 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3109 kIemNativeGstRegUse_ReadOnly);
3110 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3111 kIemNativeGstRegUse_ReadOnly);
3112
3113 /** @todo we could reduce this to a single branch instruction by spending a
3114 * temporary register and some setnz stuff. Not sure if loops are
3115 * worth it. */
3116 /* Check RCX/ECX. */
3117 if (f64Bit)
3118 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3119 else
3120 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3121
3122 /* Check the EFlags bit. */
3123 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3124 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3125 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3126 !fCheckIfSet /*fJmpIfSet*/);
3127
3128 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3129 iemNativeRegFreeTmp(pReNative, idxEflReg);
3130
3131 iemNativeCondStartIfBlock(pReNative, off);
3132 return off;
3133}
3134
3135
3136#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3137 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3138 do {
3139
3140/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3141DECL_INLINE_THROW(uint32_t)
3142iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3143{
3144 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3145
3146 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3147 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3148 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3149 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3150
3151 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3152
3153 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3154
3155 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3156
3157 iemNativeCondStartIfBlock(pReNative, off);
3158 return off;
3159}
3160
3161
3162#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3163 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3164 do {
3165
3166/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3167DECL_INLINE_THROW(uint32_t)
3168iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3169{
3170 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3171 Assert(iGReg < 16);
3172
3173 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3174 kIemNativeGstRegUse_ReadOnly);
3175
3176 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3177
3178 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3179
3180 iemNativeCondStartIfBlock(pReNative, off);
3181 return off;
3182}
3183
3184
3185
3186/*********************************************************************************************************************************
3187* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3188*********************************************************************************************************************************/
3189
3190#define IEM_MC_NOREF(a_Name) \
3191 RT_NOREF_PV(a_Name)
3192
3193#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3194 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3195
3196#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3197 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3198
3199#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3200 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3201
3202#define IEM_MC_LOCAL(a_Type, a_Name) \
3203 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3204
3205#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3206 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3207
3208#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3209 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3210
3211
3212/**
3213 * Sets the host register for @a idxVarRc to @a idxReg.
3214 *
3215 * The register must not be allocated. Any guest register shadowing will be
3216 * implictly dropped by this call.
3217 *
3218 * The variable must not have any register associated with it (causes
3219 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3220 * implied.
3221 *
3222 * @returns idxReg
3223 * @param pReNative The recompiler state.
3224 * @param idxVar The variable.
3225 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3226 * @param off For recording in debug info.
3227 *
3228 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3229 */
3230DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
3231{
3232 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3233 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3234 Assert(!pVar->fRegAcquired);
3235 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3236 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3237 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3238
3239 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3240 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3241
3242 iemNativeVarSetKindToStack(pReNative, idxVar);
3243 pVar->idxReg = idxReg;
3244
3245 return idxReg;
3246}
3247
3248
3249/**
3250 * A convenient helper function.
3251 */
3252DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3253 uint8_t idxReg, uint32_t *poff)
3254{
3255 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
3256 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3257 return idxReg;
3258}
3259
3260
3261/**
3262 * This is called by IEM_MC_END() to clean up all variables.
3263 */
3264DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3265{
3266 uint32_t const bmVars = pReNative->Core.bmVars;
3267 if (bmVars != 0)
3268 iemNativeVarFreeAllSlow(pReNative, bmVars);
3269 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3270 Assert(pReNative->Core.bmStack == 0);
3271}
3272
3273
3274#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3275
3276/**
3277 * This is called by IEM_MC_FREE_LOCAL.
3278 */
3279DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3280{
3281 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3282 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3283 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3284}
3285
3286
3287#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3288
3289/**
3290 * This is called by IEM_MC_FREE_ARG.
3291 */
3292DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3293{
3294 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3295 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3296 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3297}
3298
3299
3300#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
3301
3302/**
3303 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
3304 */
3305DECL_INLINE_THROW(uint32_t)
3306iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
3307{
3308 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
3309 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
3310 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3311 Assert( pVarDst->cbVar == sizeof(uint16_t)
3312 || pVarDst->cbVar == sizeof(uint32_t));
3313
3314 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
3315 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
3316 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
3317 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
3318 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3319
3320 Assert(pVarDst->cbVar < pVarSrc->cbVar);
3321
3322 /*
3323 * Special case for immediates.
3324 */
3325 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
3326 {
3327 switch (pVarDst->cbVar)
3328 {
3329 case sizeof(uint16_t):
3330 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
3331 break;
3332 case sizeof(uint32_t):
3333 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
3334 break;
3335 default: AssertFailed(); break;
3336 }
3337 }
3338 else
3339 {
3340 /*
3341 * The generic solution for now.
3342 */
3343 /** @todo optimize this by having the python script make sure the source
3344 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
3345 * statement. Then we could just transfer the register assignments. */
3346 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
3347 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
3348 switch (pVarDst->cbVar)
3349 {
3350 case sizeof(uint16_t):
3351 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
3352 break;
3353 case sizeof(uint32_t):
3354 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
3355 break;
3356 default: AssertFailed(); break;
3357 }
3358 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
3359 iemNativeVarRegisterRelease(pReNative, idxVarDst);
3360 }
3361 return off;
3362}
3363
3364
3365
3366/*********************************************************************************************************************************
3367* Emitters for IEM_MC_CALL_CIMPL_XXX *
3368*********************************************************************************************************************************/
3369
3370/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
3371DECL_INLINE_THROW(uint32_t)
3372iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
3373 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
3374
3375{
3376 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
3377
3378#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3379 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
3380 when a calls clobber any of the relevatn control registers. */
3381# if 1
3382 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
3383 {
3384 /* Likely as long as call+ret are done via cimpl. */
3385 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
3386 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
3387 }
3388 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
3389 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3390 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
3391 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3392 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
3393 else
3394 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3395 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3396 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3397
3398# else
3399 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
3400 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3401 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
3402 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3403 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
3404 || pfnCImpl == (uintptr_t)iemCImpl_callf
3405 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
3406 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
3407 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3408 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3409 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3410# endif
3411#endif
3412
3413 /*
3414 * Do all the call setup and cleanup.
3415 */
3416 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
3417
3418 /*
3419 * Load the two or three hidden arguments.
3420 */
3421#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3422 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3423 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3424 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
3425#else
3426 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3427 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
3428#endif
3429
3430 /*
3431 * Make the call and check the return code.
3432 *
3433 * Shadow PC copies are always flushed here, other stuff depends on flags.
3434 * Segment and general purpose registers are explictily flushed via the
3435 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
3436 * macros.
3437 */
3438 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
3439#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3440 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3441#endif
3442 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
3443 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
3444 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3445 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3446
3447 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3448}
3449
3450
3451#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
3452 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
3453
3454/** Emits code for IEM_MC_CALL_CIMPL_1. */
3455DECL_INLINE_THROW(uint32_t)
3456iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3457 uintptr_t pfnCImpl, uint8_t idxArg0)
3458{
3459 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3460 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
3461}
3462
3463
3464#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
3465 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
3466
3467/** Emits code for IEM_MC_CALL_CIMPL_2. */
3468DECL_INLINE_THROW(uint32_t)
3469iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3470 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
3471{
3472 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3473 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3474 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
3475}
3476
3477
3478#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
3479 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3480 (uintptr_t)a_pfnCImpl, a0, a1, a2)
3481
3482/** Emits code for IEM_MC_CALL_CIMPL_3. */
3483DECL_INLINE_THROW(uint32_t)
3484iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3485 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3486{
3487 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3488 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3489 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3490 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
3491}
3492
3493
3494#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
3495 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3496 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
3497
3498/** Emits code for IEM_MC_CALL_CIMPL_4. */
3499DECL_INLINE_THROW(uint32_t)
3500iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3501 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3502{
3503 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3504 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3505 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3506 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3507 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
3508}
3509
3510
3511#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
3512 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3513 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
3514
3515/** Emits code for IEM_MC_CALL_CIMPL_4. */
3516DECL_INLINE_THROW(uint32_t)
3517iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3518 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
3519{
3520 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3521 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3522 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3523 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3524 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
3525 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
3526}
3527
3528
3529/** Recompiler debugging: Flush guest register shadow copies. */
3530#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
3531
3532
3533
3534/*********************************************************************************************************************************
3535* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
3536*********************************************************************************************************************************/
3537
3538/**
3539 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
3540 */
3541DECL_INLINE_THROW(uint32_t)
3542iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3543 uintptr_t pfnAImpl, uint8_t cArgs)
3544{
3545 if (idxVarRc != UINT8_MAX)
3546 {
3547 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
3548 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
3549 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3550 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3551 }
3552
3553 /*
3554 * Do all the call setup and cleanup.
3555 *
3556 * It is only required to flush pending guest register writes in call volatile registers as
3557 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
3558 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
3559 * no matter the fFlushPendingWrites parameter.
3560 */
3561 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
3562
3563 /*
3564 * Make the call and update the return code variable if we've got one.
3565 */
3566 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
3567 if (idxVarRc != UINT8_MAX)
3568 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
3569
3570 return off;
3571}
3572
3573
3574
3575#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
3576 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
3577
3578#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
3579 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
3580
3581/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
3582DECL_INLINE_THROW(uint32_t)
3583iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
3584{
3585 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
3586}
3587
3588
3589#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
3590 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
3591
3592#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
3593 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
3594
3595/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
3596DECL_INLINE_THROW(uint32_t)
3597iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
3598{
3599 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3600 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
3601}
3602
3603
3604#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
3605 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
3606
3607#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
3608 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
3609
3610/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
3611DECL_INLINE_THROW(uint32_t)
3612iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3613 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
3614{
3615 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3616 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3617 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
3618}
3619
3620
3621#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
3622 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
3623
3624#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
3625 IEM_MC_LOCAL(a_rcType, a_rc); \
3626 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
3627
3628/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
3629DECL_INLINE_THROW(uint32_t)
3630iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3631 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3632{
3633 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3634 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3635 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3636 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
3637}
3638
3639
3640#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
3641 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3642
3643#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
3644 IEM_MC_LOCAL(a_rcType, a_rc); \
3645 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3646
3647/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
3648DECL_INLINE_THROW(uint32_t)
3649iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3650 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3651{
3652 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3653 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3654 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3655 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
3656 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
3657}
3658
3659
3660
3661/*********************************************************************************************************************************
3662* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
3663*********************************************************************************************************************************/
3664
3665#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
3666 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
3667
3668#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3669 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
3670
3671#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3672 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
3673
3674#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3675 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
3676
3677
3678/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
3679 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
3680DECL_INLINE_THROW(uint32_t)
3681iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
3682{
3683 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3684 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3685 Assert(iGRegEx < 20);
3686
3687 /* Same discussion as in iemNativeEmitFetchGregU16 */
3688 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3689 kIemNativeGstRegUse_ReadOnly);
3690
3691 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3692 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3693
3694 /* The value is zero-extended to the full 64-bit host register width. */
3695 if (iGRegEx < 16)
3696 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3697 else
3698 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3699
3700 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3701 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3702 return off;
3703}
3704
3705
3706#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3707 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
3708
3709#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3710 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
3711
3712#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3713 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
3714
3715/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
3716DECL_INLINE_THROW(uint32_t)
3717iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
3718{
3719 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3720 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3721 Assert(iGRegEx < 20);
3722
3723 /* Same discussion as in iemNativeEmitFetchGregU16 */
3724 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3725 kIemNativeGstRegUse_ReadOnly);
3726
3727 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3728 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3729
3730 if (iGRegEx < 16)
3731 {
3732 switch (cbSignExtended)
3733 {
3734 case sizeof(uint16_t):
3735 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3736 break;
3737 case sizeof(uint32_t):
3738 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3739 break;
3740 case sizeof(uint64_t):
3741 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3742 break;
3743 default: AssertFailed(); break;
3744 }
3745 }
3746 else
3747 {
3748 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3749 switch (cbSignExtended)
3750 {
3751 case sizeof(uint16_t):
3752 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3753 break;
3754 case sizeof(uint32_t):
3755 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3756 break;
3757 case sizeof(uint64_t):
3758 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3759 break;
3760 default: AssertFailed(); break;
3761 }
3762 }
3763
3764 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3765 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3766 return off;
3767}
3768
3769
3770
3771#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
3772 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
3773
3774#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
3775 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3776
3777#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
3778 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3779
3780/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
3781DECL_INLINE_THROW(uint32_t)
3782iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3783{
3784 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3785 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3786 Assert(iGReg < 16);
3787
3788 /*
3789 * We can either just load the low 16-bit of the GPR into a host register
3790 * for the variable, or we can do so via a shadow copy host register. The
3791 * latter will avoid having to reload it if it's being stored later, but
3792 * will waste a host register if it isn't touched again. Since we don't
3793 * know what going to happen, we choose the latter for now.
3794 */
3795 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3796 kIemNativeGstRegUse_ReadOnly);
3797
3798 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3799 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3800 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3801 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3802
3803 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3804 return off;
3805}
3806
3807
3808#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
3809 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3810
3811#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
3812 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3813
3814/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
3815DECL_INLINE_THROW(uint32_t)
3816iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
3817{
3818 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3819 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3820 Assert(iGReg < 16);
3821
3822 /*
3823 * We can either just load the low 16-bit of the GPR into a host register
3824 * for the variable, or we can do so via a shadow copy host register. The
3825 * latter will avoid having to reload it if it's being stored later, but
3826 * will waste a host register if it isn't touched again. Since we don't
3827 * know what going to happen, we choose the latter for now.
3828 */
3829 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3830 kIemNativeGstRegUse_ReadOnly);
3831
3832 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3833 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3834 if (cbSignExtended == sizeof(uint32_t))
3835 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3836 else
3837 {
3838 Assert(cbSignExtended == sizeof(uint64_t));
3839 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3840 }
3841 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3842
3843 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3844 return off;
3845}
3846
3847
3848#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
3849 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
3850
3851#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
3852 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
3853
3854/** Emits code for IEM_MC_FETCH_GREG_U32. */
3855DECL_INLINE_THROW(uint32_t)
3856iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3857{
3858 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3859 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3860 Assert(iGReg < 16);
3861
3862 /*
3863 * We can either just load the low 16-bit of the GPR into a host register
3864 * for the variable, or we can do so via a shadow copy host register. The
3865 * latter will avoid having to reload it if it's being stored later, but
3866 * will waste a host register if it isn't touched again. Since we don't
3867 * know what going to happen, we choose the latter for now.
3868 */
3869 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3870 kIemNativeGstRegUse_ReadOnly);
3871
3872 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3873 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3874 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3875 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3876
3877 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3878 return off;
3879}
3880
3881
3882#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
3883 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
3884
3885/** Emits code for IEM_MC_FETCH_GREG_U32. */
3886DECL_INLINE_THROW(uint32_t)
3887iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3888{
3889 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3890 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3891 Assert(iGReg < 16);
3892
3893 /*
3894 * We can either just load the low 32-bit of the GPR into a host register
3895 * for the variable, or we can do so via a shadow copy host register. The
3896 * latter will avoid having to reload it if it's being stored later, but
3897 * will waste a host register if it isn't touched again. Since we don't
3898 * know what going to happen, we choose the latter for now.
3899 */
3900 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3901 kIemNativeGstRegUse_ReadOnly);
3902
3903 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3904 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3905 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3906 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3907
3908 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3909 return off;
3910}
3911
3912
3913#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
3914 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3915
3916#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
3917 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3918
3919/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
3920 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
3921DECL_INLINE_THROW(uint32_t)
3922iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3923{
3924 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3925 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3926 Assert(iGReg < 16);
3927
3928 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3929 kIemNativeGstRegUse_ReadOnly);
3930
3931 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3932 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3933 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
3934 /** @todo name the register a shadow one already? */
3935 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3936
3937 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3938 return off;
3939}
3940
3941
3942#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3943#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
3944 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
3945
3946/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
3947DECL_INLINE_THROW(uint32_t)
3948iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
3949{
3950 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3951 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
3952 Assert(iGRegLo < 16 && iGRegHi < 16);
3953
3954 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
3955 kIemNativeGstRegUse_ReadOnly);
3956 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
3957 kIemNativeGstRegUse_ReadOnly);
3958
3959 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3960 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
3961 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
3962 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
3963
3964 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
3965 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
3966 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
3967 return off;
3968}
3969#endif
3970
3971
3972/*********************************************************************************************************************************
3973* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
3974*********************************************************************************************************************************/
3975
3976#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
3977 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
3978
3979/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
3980DECL_INLINE_THROW(uint32_t)
3981iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
3982{
3983 Assert(iGRegEx < 20);
3984 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3985 kIemNativeGstRegUse_ForUpdate);
3986#ifdef RT_ARCH_AMD64
3987 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3988
3989 /* To the lowest byte of the register: mov r8, imm8 */
3990 if (iGRegEx < 16)
3991 {
3992 if (idxGstTmpReg >= 8)
3993 pbCodeBuf[off++] = X86_OP_REX_B;
3994 else if (idxGstTmpReg >= 4)
3995 pbCodeBuf[off++] = X86_OP_REX;
3996 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
3997 pbCodeBuf[off++] = u8Value;
3998 }
3999 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4000 else if (idxGstTmpReg < 4)
4001 {
4002 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4003 pbCodeBuf[off++] = u8Value;
4004 }
4005 else
4006 {
4007 /* ror reg64, 8 */
4008 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4009 pbCodeBuf[off++] = 0xc1;
4010 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4011 pbCodeBuf[off++] = 8;
4012
4013 /* mov reg8, imm8 */
4014 if (idxGstTmpReg >= 8)
4015 pbCodeBuf[off++] = X86_OP_REX_B;
4016 else if (idxGstTmpReg >= 4)
4017 pbCodeBuf[off++] = X86_OP_REX;
4018 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4019 pbCodeBuf[off++] = u8Value;
4020
4021 /* rol reg64, 8 */
4022 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4023 pbCodeBuf[off++] = 0xc1;
4024 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4025 pbCodeBuf[off++] = 8;
4026 }
4027
4028#elif defined(RT_ARCH_ARM64)
4029 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4030 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4031 if (iGRegEx < 16)
4032 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4033 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4034 else
4035 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4036 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4037 iemNativeRegFreeTmp(pReNative, idxImmReg);
4038
4039#else
4040# error "Port me!"
4041#endif
4042
4043 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4044
4045#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4046 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4047#endif
4048
4049 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4050 return off;
4051}
4052
4053
4054#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4055 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4056
4057/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4058DECL_INLINE_THROW(uint32_t)
4059iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4060{
4061 Assert(iGRegEx < 20);
4062 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4063
4064 /*
4065 * If it's a constant value (unlikely) we treat this as a
4066 * IEM_MC_STORE_GREG_U8_CONST statement.
4067 */
4068 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4069 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4070 { /* likely */ }
4071 else
4072 {
4073 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4074 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4075 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4076 }
4077
4078 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4079 kIemNativeGstRegUse_ForUpdate);
4080 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4081
4082#ifdef RT_ARCH_AMD64
4083 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4084 if (iGRegEx < 16)
4085 {
4086 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4087 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4088 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4089 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4090 pbCodeBuf[off++] = X86_OP_REX;
4091 pbCodeBuf[off++] = 0x8a;
4092 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4093 }
4094 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4095 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4096 {
4097 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4098 pbCodeBuf[off++] = 0x8a;
4099 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4100 }
4101 else
4102 {
4103 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4104
4105 /* ror reg64, 8 */
4106 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4107 pbCodeBuf[off++] = 0xc1;
4108 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4109 pbCodeBuf[off++] = 8;
4110
4111 /* mov reg8, reg8(r/m) */
4112 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4113 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4114 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4115 pbCodeBuf[off++] = X86_OP_REX;
4116 pbCodeBuf[off++] = 0x8a;
4117 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4118
4119 /* rol reg64, 8 */
4120 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4121 pbCodeBuf[off++] = 0xc1;
4122 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4123 pbCodeBuf[off++] = 8;
4124 }
4125
4126#elif defined(RT_ARCH_ARM64)
4127 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4128 or
4129 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4130 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4131 if (iGRegEx < 16)
4132 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4133 else
4134 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4135
4136#else
4137# error "Port me!"
4138#endif
4139 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4140
4141 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4142
4143#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4144 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4145#endif
4146 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4147 return off;
4148}
4149
4150
4151
4152#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4153 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4154
4155/** Emits code for IEM_MC_STORE_GREG_U16. */
4156DECL_INLINE_THROW(uint32_t)
4157iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4158{
4159 Assert(iGReg < 16);
4160 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4161 kIemNativeGstRegUse_ForUpdate);
4162#ifdef RT_ARCH_AMD64
4163 /* mov reg16, imm16 */
4164 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4165 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4166 if (idxGstTmpReg >= 8)
4167 pbCodeBuf[off++] = X86_OP_REX_B;
4168 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4169 pbCodeBuf[off++] = RT_BYTE1(uValue);
4170 pbCodeBuf[off++] = RT_BYTE2(uValue);
4171
4172#elif defined(RT_ARCH_ARM64)
4173 /* movk xdst, #uValue, lsl #0 */
4174 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4175 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4176
4177#else
4178# error "Port me!"
4179#endif
4180
4181 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4182
4183#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4184 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4185#endif
4186 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4187 return off;
4188}
4189
4190
4191#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4192 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4193
4194/** Emits code for IEM_MC_STORE_GREG_U16. */
4195DECL_INLINE_THROW(uint32_t)
4196iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4197{
4198 Assert(iGReg < 16);
4199 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4200
4201 /*
4202 * If it's a constant value (unlikely) we treat this as a
4203 * IEM_MC_STORE_GREG_U16_CONST statement.
4204 */
4205 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4206 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4207 { /* likely */ }
4208 else
4209 {
4210 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4211 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4212 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4213 }
4214
4215 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4216 kIemNativeGstRegUse_ForUpdate);
4217
4218#ifdef RT_ARCH_AMD64
4219 /* mov reg16, reg16 or [mem16] */
4220 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4221 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4222 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4223 {
4224 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4225 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4226 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4227 pbCodeBuf[off++] = 0x8b;
4228 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4229 }
4230 else
4231 {
4232 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4233 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4234 if (idxGstTmpReg >= 8)
4235 pbCodeBuf[off++] = X86_OP_REX_R;
4236 pbCodeBuf[off++] = 0x8b;
4237 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4238 }
4239
4240#elif defined(RT_ARCH_ARM64)
4241 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4242 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4243 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4244 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4245 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4246
4247#else
4248# error "Port me!"
4249#endif
4250
4251 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4252
4253#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4254 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4255#endif
4256 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4257 return off;
4258}
4259
4260
4261#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
4262 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
4263
4264/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
4265DECL_INLINE_THROW(uint32_t)
4266iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
4267{
4268 Assert(iGReg < 16);
4269 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4270 kIemNativeGstRegUse_ForFullWrite);
4271 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4272#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4273 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4274#endif
4275 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4276 return off;
4277}
4278
4279
4280#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
4281 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
4282
4283/** Emits code for IEM_MC_STORE_GREG_U32. */
4284DECL_INLINE_THROW(uint32_t)
4285iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4286{
4287 Assert(iGReg < 16);
4288 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4289
4290 /*
4291 * If it's a constant value (unlikely) we treat this as a
4292 * IEM_MC_STORE_GREG_U32_CONST statement.
4293 */
4294 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4295 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4296 { /* likely */ }
4297 else
4298 {
4299 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4300 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4301 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
4302 }
4303
4304 /*
4305 * For the rest we allocate a guest register for the variable and writes
4306 * it to the CPUMCTX structure.
4307 */
4308 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4309#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4310 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4311#else
4312 RT_NOREF(idxVarReg);
4313#endif
4314#ifdef VBOX_STRICT
4315 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
4316#endif
4317 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4318 return off;
4319}
4320
4321
4322#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
4323 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
4324
4325/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
4326DECL_INLINE_THROW(uint32_t)
4327iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
4328{
4329 Assert(iGReg < 16);
4330 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4331 kIemNativeGstRegUse_ForFullWrite);
4332 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4333#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4334 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4335#endif
4336 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4337 return off;
4338}
4339
4340
4341#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
4342 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
4343
4344#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
4345 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
4346
4347/** Emits code for IEM_MC_STORE_GREG_U64. */
4348DECL_INLINE_THROW(uint32_t)
4349iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4350{
4351 Assert(iGReg < 16);
4352 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4353
4354 /*
4355 * If it's a constant value (unlikely) we treat this as a
4356 * IEM_MC_STORE_GREG_U64_CONST statement.
4357 */
4358 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4359 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4360 { /* likely */ }
4361 else
4362 {
4363 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4364 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4365 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
4366 }
4367
4368 /*
4369 * For the rest we allocate a guest register for the variable and writes
4370 * it to the CPUMCTX structure.
4371 */
4372 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4373#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4374 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4375#else
4376 RT_NOREF(idxVarReg);
4377#endif
4378 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4379 return off;
4380}
4381
4382
4383#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
4384 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
4385
4386/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
4387DECL_INLINE_THROW(uint32_t)
4388iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
4389{
4390 Assert(iGReg < 16);
4391 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4392 kIemNativeGstRegUse_ForUpdate);
4393 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
4394#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4395 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4396#endif
4397 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4398 return off;
4399}
4400
4401
4402#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4403#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
4404 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
4405
4406/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4407DECL_INLINE_THROW(uint32_t)
4408iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
4409{
4410 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4411 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4412 Assert(iGRegLo < 16 && iGRegHi < 16);
4413
4414 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4415 kIemNativeGstRegUse_ForFullWrite);
4416 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4417 kIemNativeGstRegUse_ForFullWrite);
4418
4419 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4420 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
4421 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
4422 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
4423
4424 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4425 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4426 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4427 return off;
4428}
4429#endif
4430
4431
4432/*********************************************************************************************************************************
4433* General purpose register manipulation (add, sub). *
4434*********************************************************************************************************************************/
4435
4436#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4437 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4438
4439/** Emits code for IEM_MC_ADD_GREG_U16. */
4440DECL_INLINE_THROW(uint32_t)
4441iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
4442{
4443 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4444 kIemNativeGstRegUse_ForUpdate);
4445
4446#ifdef RT_ARCH_AMD64
4447 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4448 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4449 if (idxGstTmpReg >= 8)
4450 pbCodeBuf[off++] = X86_OP_REX_B;
4451 if (uAddend == 1)
4452 {
4453 pbCodeBuf[off++] = 0xff; /* inc */
4454 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4455 }
4456 else
4457 {
4458 pbCodeBuf[off++] = 0x81;
4459 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4460 pbCodeBuf[off++] = uAddend;
4461 pbCodeBuf[off++] = 0;
4462 }
4463
4464#else
4465 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4466 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4467
4468 /* sub tmp, gstgrp, uAddend */
4469 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
4470
4471 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4472 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4473
4474 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4475#endif
4476
4477 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4478
4479#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4480 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4481#endif
4482
4483 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4484 return off;
4485}
4486
4487
4488#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
4489 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4490
4491#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
4492 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4493
4494/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
4495DECL_INLINE_THROW(uint32_t)
4496iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
4497{
4498 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4499 kIemNativeGstRegUse_ForUpdate);
4500
4501#ifdef RT_ARCH_AMD64
4502 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4503 if (f64Bit)
4504 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4505 else if (idxGstTmpReg >= 8)
4506 pbCodeBuf[off++] = X86_OP_REX_B;
4507 if (uAddend == 1)
4508 {
4509 pbCodeBuf[off++] = 0xff; /* inc */
4510 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4511 }
4512 else if (uAddend < 128)
4513 {
4514 pbCodeBuf[off++] = 0x83; /* add */
4515 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4516 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4517 }
4518 else
4519 {
4520 pbCodeBuf[off++] = 0x81; /* add */
4521 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4522 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4523 pbCodeBuf[off++] = 0;
4524 pbCodeBuf[off++] = 0;
4525 pbCodeBuf[off++] = 0;
4526 }
4527
4528#else
4529 /* sub tmp, gstgrp, uAddend */
4530 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4531 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
4532
4533#endif
4534
4535 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4536
4537#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4538 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4539#endif
4540
4541 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4542 return off;
4543}
4544
4545
4546
4547#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4548 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4549
4550/** Emits code for IEM_MC_SUB_GREG_U16. */
4551DECL_INLINE_THROW(uint32_t)
4552iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
4553{
4554 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4555 kIemNativeGstRegUse_ForUpdate);
4556
4557#ifdef RT_ARCH_AMD64
4558 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4559 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4560 if (idxGstTmpReg >= 8)
4561 pbCodeBuf[off++] = X86_OP_REX_B;
4562 if (uSubtrahend == 1)
4563 {
4564 pbCodeBuf[off++] = 0xff; /* dec */
4565 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4566 }
4567 else
4568 {
4569 pbCodeBuf[off++] = 0x81;
4570 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4571 pbCodeBuf[off++] = uSubtrahend;
4572 pbCodeBuf[off++] = 0;
4573 }
4574
4575#else
4576 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4577 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4578
4579 /* sub tmp, gstgrp, uSubtrahend */
4580 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
4581
4582 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4583 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4584
4585 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4586#endif
4587
4588 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4589
4590#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4591 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4592#endif
4593
4594 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4595 return off;
4596}
4597
4598
4599#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
4600 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4601
4602#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
4603 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4604
4605/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
4606DECL_INLINE_THROW(uint32_t)
4607iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
4608{
4609 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4610 kIemNativeGstRegUse_ForUpdate);
4611
4612#ifdef RT_ARCH_AMD64
4613 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4614 if (f64Bit)
4615 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4616 else if (idxGstTmpReg >= 8)
4617 pbCodeBuf[off++] = X86_OP_REX_B;
4618 if (uSubtrahend == 1)
4619 {
4620 pbCodeBuf[off++] = 0xff; /* dec */
4621 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4622 }
4623 else if (uSubtrahend < 128)
4624 {
4625 pbCodeBuf[off++] = 0x83; /* sub */
4626 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4627 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4628 }
4629 else
4630 {
4631 pbCodeBuf[off++] = 0x81; /* sub */
4632 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4633 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4634 pbCodeBuf[off++] = 0;
4635 pbCodeBuf[off++] = 0;
4636 pbCodeBuf[off++] = 0;
4637 }
4638
4639#else
4640 /* sub tmp, gstgrp, uSubtrahend */
4641 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4642 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
4643
4644#endif
4645
4646 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4647
4648#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4649 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4650#endif
4651
4652 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4653 return off;
4654}
4655
4656
4657#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
4658 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4659
4660#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
4661 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4662
4663#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
4664 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4665
4666#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
4667 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4668
4669/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
4670DECL_INLINE_THROW(uint32_t)
4671iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4672{
4673#ifdef VBOX_STRICT
4674 switch (cbMask)
4675 {
4676 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4677 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4678 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4679 case sizeof(uint64_t): break;
4680 default: AssertFailedBreak();
4681 }
4682#endif
4683
4684 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4685 kIemNativeGstRegUse_ForUpdate);
4686
4687 switch (cbMask)
4688 {
4689 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4690 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
4691 break;
4692 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
4693 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
4694 break;
4695 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4696 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4697 break;
4698 case sizeof(uint64_t):
4699 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
4700 break;
4701 default: AssertFailedBreak();
4702 }
4703
4704 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4705
4706#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4707 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4708#endif
4709
4710 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4711 return off;
4712}
4713
4714
4715#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
4716 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4717
4718#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
4719 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4720
4721#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
4722 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4723
4724#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
4725 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4726
4727/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
4728DECL_INLINE_THROW(uint32_t)
4729iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4730{
4731#ifdef VBOX_STRICT
4732 switch (cbMask)
4733 {
4734 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4735 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4736 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4737 case sizeof(uint64_t): break;
4738 default: AssertFailedBreak();
4739 }
4740#endif
4741
4742 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4743 kIemNativeGstRegUse_ForUpdate);
4744
4745 switch (cbMask)
4746 {
4747 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4748 case sizeof(uint16_t):
4749 case sizeof(uint64_t):
4750 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
4751 break;
4752 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4753 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4754 break;
4755 default: AssertFailedBreak();
4756 }
4757
4758 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4759
4760#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4761 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4762#endif
4763
4764 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4765 return off;
4766}
4767
4768
4769/*********************************************************************************************************************************
4770* Local/Argument variable manipulation (add, sub, and, or). *
4771*********************************************************************************************************************************/
4772
4773#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
4774 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4775
4776#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
4777 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4778
4779#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
4780 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4781
4782#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
4783 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4784
4785
4786#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
4787 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
4788
4789#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
4790 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
4791
4792#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
4793 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
4794
4795/** Emits code for AND'ing a local and a constant value. */
4796DECL_INLINE_THROW(uint32_t)
4797iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4798{
4799#ifdef VBOX_STRICT
4800 switch (cbMask)
4801 {
4802 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4803 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4804 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4805 case sizeof(uint64_t): break;
4806 default: AssertFailedBreak();
4807 }
4808#endif
4809
4810 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4811 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4812
4813 if (cbMask <= sizeof(uint32_t))
4814 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
4815 else
4816 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
4817
4818 iemNativeVarRegisterRelease(pReNative, idxVar);
4819 return off;
4820}
4821
4822
4823#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
4824 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4825
4826#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
4827 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4828
4829#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
4830 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4831
4832#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
4833 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4834
4835/** Emits code for OR'ing a local and a constant value. */
4836DECL_INLINE_THROW(uint32_t)
4837iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4838{
4839#ifdef VBOX_STRICT
4840 switch (cbMask)
4841 {
4842 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4843 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4844 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4845 case sizeof(uint64_t): break;
4846 default: AssertFailedBreak();
4847 }
4848#endif
4849
4850 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4851 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4852
4853 if (cbMask <= sizeof(uint32_t))
4854 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
4855 else
4856 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
4857
4858 iemNativeVarRegisterRelease(pReNative, idxVar);
4859 return off;
4860}
4861
4862
4863#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
4864 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
4865
4866#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
4867 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
4868
4869#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
4870 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
4871
4872/** Emits code for reversing the byte order in a local value. */
4873DECL_INLINE_THROW(uint32_t)
4874iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
4875{
4876 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4877 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4878
4879 switch (cbLocal)
4880 {
4881 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
4882 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
4883 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
4884 default: AssertFailedBreak();
4885 }
4886
4887 iemNativeVarRegisterRelease(pReNative, idxVar);
4888 return off;
4889}
4890
4891
4892#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
4893 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4894
4895#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
4896 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4897
4898#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
4899 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4900
4901/** Emits code for shifting left a local value. */
4902DECL_INLINE_THROW(uint32_t)
4903iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4904{
4905#ifdef VBOX_STRICT
4906 switch (cbLocal)
4907 {
4908 case sizeof(uint8_t): Assert(cShift < 8); break;
4909 case sizeof(uint16_t): Assert(cShift < 16); break;
4910 case sizeof(uint32_t): Assert(cShift < 32); break;
4911 case sizeof(uint64_t): Assert(cShift < 64); break;
4912 default: AssertFailedBreak();
4913 }
4914#endif
4915
4916 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4917 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4918
4919 if (cbLocal <= sizeof(uint32_t))
4920 {
4921 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
4922 if (cbLocal < sizeof(uint32_t))
4923 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
4924 cbLocal == sizeof(uint16_t)
4925 ? UINT32_C(0xffff)
4926 : UINT32_C(0xff));
4927 }
4928 else
4929 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
4930
4931 iemNativeVarRegisterRelease(pReNative, idxVar);
4932 return off;
4933}
4934
4935
4936#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
4937 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4938
4939#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
4940 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4941
4942#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
4943 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4944
4945/** Emits code for shifting left a local value. */
4946DECL_INLINE_THROW(uint32_t)
4947iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4948{
4949#ifdef VBOX_STRICT
4950 switch (cbLocal)
4951 {
4952 case sizeof(int8_t): Assert(cShift < 8); break;
4953 case sizeof(int16_t): Assert(cShift < 16); break;
4954 case sizeof(int32_t): Assert(cShift < 32); break;
4955 case sizeof(int64_t): Assert(cShift < 64); break;
4956 default: AssertFailedBreak();
4957 }
4958#endif
4959
4960 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4961 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4962
4963 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
4964 if (cbLocal == sizeof(uint8_t))
4965 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4966 else if (cbLocal == sizeof(uint16_t))
4967 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
4968
4969 if (cbLocal <= sizeof(uint32_t))
4970 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
4971 else
4972 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
4973
4974 iemNativeVarRegisterRelease(pReNative, idxVar);
4975 return off;
4976}
4977
4978
4979#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
4980 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
4981
4982#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
4983 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
4984
4985#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
4986 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
4987
4988/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
4989DECL_INLINE_THROW(uint32_t)
4990iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
4991{
4992 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
4993 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
4994 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4995 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4996
4997 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4998 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
4999
5000 /* Need to sign extend the value. */
5001 if (cbLocal <= sizeof(uint32_t))
5002 {
5003/** @todo ARM64: In case of boredone, the extended add instruction can do the
5004 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5005 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5006
5007 switch (cbLocal)
5008 {
5009 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5010 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5011 default: AssertFailed();
5012 }
5013
5014 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5015 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5016 }
5017 else
5018 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5019
5020 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5021 iemNativeVarRegisterRelease(pReNative, idxVar);
5022 return off;
5023}
5024
5025
5026
5027/*********************************************************************************************************************************
5028* EFLAGS *
5029*********************************************************************************************************************************/
5030
5031#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5032# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5033#else
5034# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5035 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5036
5037DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5038{
5039 if (fEflOutput)
5040 {
5041 PVMCPUCC const pVCpu = pReNative->pVCpu;
5042# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5043 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5044 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5045 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5046# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5047 if (fEflOutput & (a_fEfl)) \
5048 { \
5049 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5050 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5051 else \
5052 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5053 } else do { } while (0)
5054# else
5055 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5056 IEMLIVENESSBIT const LivenessClobbered =
5057 {
5058 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5059 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5060 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5061 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5062 };
5063 IEMLIVENESSBIT const LivenessDelayable =
5064 {
5065 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5066 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5067 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5068 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5069 };
5070# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5071 if (fEflOutput & (a_fEfl)) \
5072 { \
5073 if (LivenessClobbered.a_fLivenessMember) \
5074 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5075 else if (LivenessDelayable.a_fLivenessMember) \
5076 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5077 else \
5078 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5079 } else do { } while (0)
5080# endif
5081 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5082 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5083 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5084 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5085 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5086 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5087 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5088# undef CHECK_FLAG_AND_UPDATE_STATS
5089 }
5090 RT_NOREF(fEflInput);
5091}
5092#endif /* VBOX_WITH_STATISTICS */
5093
5094#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5095#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5096 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5097
5098/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5099DECL_INLINE_THROW(uint32_t)
5100iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5101 uint32_t fEflInput, uint32_t fEflOutput)
5102{
5103 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5104 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5105 RT_NOREF(fEflInput, fEflOutput);
5106
5107#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5108# ifdef VBOX_STRICT
5109 if ( pReNative->idxCurCall != 0
5110 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5111 {
5112 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5113 uint32_t const fBoth = fEflInput | fEflOutput;
5114# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5115 AssertMsg( !(fBoth & (a_fElfConst)) \
5116 || (!(fEflInput & (a_fElfConst)) \
5117 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5118 : !(fEflOutput & (a_fElfConst)) \
5119 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5120 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5121 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5122 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5123 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5124 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5125 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5126 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5127 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5128 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5129# undef ASSERT_ONE_EFL
5130 }
5131# endif
5132#endif
5133
5134 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5135
5136 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
5137 * the existing shadow copy. */
5138 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5139 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5140 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5141 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5142 return off;
5143}
5144
5145
5146
5147/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5148 * start using it with custom native code emission (inlining assembly
5149 * instruction helpers). */
5150#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5151#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5152 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5153 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5154
5155#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5156#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5157 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5158 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5159
5160/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5161DECL_INLINE_THROW(uint32_t)
5162iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5163 bool fUpdateSkipping)
5164{
5165 RT_NOREF(fEflOutput);
5166 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5167 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5168
5169#ifdef VBOX_STRICT
5170 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5171 uint32_t offFixup = off;
5172 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5173 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5174 iemNativeFixupFixedJump(pReNative, offFixup, off);
5175
5176 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5177 offFixup = off;
5178 off = iemNativeEmitJzToFixed(pReNative, off, off);
5179 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5180 iemNativeFixupFixedJump(pReNative, offFixup, off);
5181
5182 /** @todo validate that only bits in the fElfOutput mask changed. */
5183#endif
5184
5185#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5186 if (fUpdateSkipping)
5187 {
5188 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5189 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5190 else
5191 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5192 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5193 }
5194#else
5195 RT_NOREF_PV(fUpdateSkipping);
5196#endif
5197
5198 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5199 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5200 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5201 return off;
5202}
5203
5204
5205typedef enum IEMNATIVEMITEFLOP
5206{
5207 kIemNativeEmitEflOp_Invalid = 0,
5208 kIemNativeEmitEflOp_Set,
5209 kIemNativeEmitEflOp_Clear,
5210 kIemNativeEmitEflOp_Flip
5211} IEMNATIVEMITEFLOP;
5212
5213#define IEM_MC_SET_EFL_BIT(a_fBit) \
5214 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
5215
5216#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5217 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
5218
5219#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5220 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
5221
5222/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5223DECL_INLINE_THROW(uint32_t)
5224iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
5225{
5226 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5227 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
5228
5229 switch (enmOp)
5230 {
5231 case kIemNativeEmitEflOp_Set:
5232 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5233 break;
5234 case kIemNativeEmitEflOp_Clear:
5235 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
5236 break;
5237 case kIemNativeEmitEflOp_Flip:
5238 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5239 break;
5240 default:
5241 AssertFailed();
5242 break;
5243 }
5244
5245 /** @todo No delayed writeback for EFLAGS right now. */
5246 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5247
5248 /* Free but don't flush the EFLAGS register. */
5249 iemNativeRegFreeTmp(pReNative, idxEflReg);
5250
5251 return off;
5252}
5253
5254
5255/*********************************************************************************************************************************
5256* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
5257*********************************************************************************************************************************/
5258
5259#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
5260 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
5261
5262#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
5263 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
5264
5265#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
5266 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
5267
5268
5269/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
5270 * IEM_MC_FETCH_SREG_ZX_U64. */
5271DECL_INLINE_THROW(uint32_t)
5272iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
5273{
5274 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5275 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
5276 Assert(iSReg < X86_SREG_COUNT);
5277
5278 /*
5279 * For now, we will not create a shadow copy of a selector. The rational
5280 * is that since we do not recompile the popping and loading of segment
5281 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
5282 * pushing and moving to registers, there is only a small chance that the
5283 * shadow copy will be accessed again before the register is reloaded. One
5284 * scenario would be nested called in 16-bit code, but I doubt it's worth
5285 * the extra register pressure atm.
5286 *
5287 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
5288 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
5289 * store scencario covered at present (r160730).
5290 */
5291 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5292 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
5293 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
5294 iemNativeVarRegisterRelease(pReNative, idxDstVar);
5295 return off;
5296}
5297
5298
5299
5300/*********************************************************************************************************************************
5301* Register references. *
5302*********************************************************************************************************************************/
5303
5304#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
5305 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
5306
5307#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
5308 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
5309
5310/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
5311DECL_INLINE_THROW(uint32_t)
5312iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
5313{
5314 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
5315 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5316 Assert(iGRegEx < 20);
5317
5318 if (iGRegEx < 16)
5319 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5320 else
5321 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
5322
5323 /* If we've delayed writing back the register value, flush it now. */
5324 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5325
5326 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5327 if (!fConst)
5328 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
5329
5330 return off;
5331}
5332
5333#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
5334 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
5335
5336#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
5337 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
5338
5339#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
5340 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
5341
5342#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
5343 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
5344
5345#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
5346 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
5347
5348#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
5349 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
5350
5351#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
5352 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
5353
5354#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
5355 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
5356
5357#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
5358 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
5359
5360#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
5361 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
5362
5363/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
5364DECL_INLINE_THROW(uint32_t)
5365iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
5366{
5367 Assert(iGReg < 16);
5368 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
5369 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5370
5371 /* If we've delayed writing back the register value, flush it now. */
5372 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
5373
5374 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5375 if (!fConst)
5376 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
5377
5378 return off;
5379}
5380
5381
5382#undef IEM_MC_REF_EFLAGS /* should not be used. */
5383#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
5384 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5385 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
5386
5387/** Handles IEM_MC_REF_EFLAGS. */
5388DECL_INLINE_THROW(uint32_t)
5389iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
5390{
5391 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
5392 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5393
5394#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5395 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5396
5397 /* Updating the skipping according to the outputs is a little early, but
5398 we don't have any other hooks for references atm. */
5399 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5400 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5401 else if (fEflOutput & X86_EFL_STATUS_BITS)
5402 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5403 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5404#else
5405 RT_NOREF(fEflInput, fEflOutput);
5406#endif
5407
5408 /* If we've delayed writing back the register value, flush it now. */
5409 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
5410
5411 /* If there is a shadow copy of guest EFLAGS, flush it now. */
5412 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
5413
5414 return off;
5415}
5416
5417
5418/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
5419 * different code from threaded recompiler, maybe it would be helpful. For now
5420 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
5421#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
5422
5423
5424#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
5425 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
5426
5427#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
5428 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
5429
5430#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
5431 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
5432
5433#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
5434 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
5435
5436#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5437/* Just being paranoid here. */
5438# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
5439AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
5440AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
5441AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
5442AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
5443# endif
5444AssertCompileMemberOffset(X86XMMREG, au64, 0);
5445AssertCompileMemberOffset(X86XMMREG, au32, 0);
5446AssertCompileMemberOffset(X86XMMREG, ar64, 0);
5447AssertCompileMemberOffset(X86XMMREG, ar32, 0);
5448
5449# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
5450 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
5451# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
5452 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
5453# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
5454 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
5455# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
5456 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
5457#endif
5458
5459/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
5460DECL_INLINE_THROW(uint32_t)
5461iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
5462{
5463 Assert(iXReg < 16);
5464 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
5465 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5466
5467 /* If we've delayed writing back the register value, flush it now. */
5468 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
5469
5470#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5471 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5472 if (!fConst)
5473 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
5474#else
5475 RT_NOREF(fConst);
5476#endif
5477
5478 return off;
5479}
5480
5481
5482
5483/*********************************************************************************************************************************
5484* Effective Address Calculation *
5485*********************************************************************************************************************************/
5486#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
5487 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
5488
5489/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
5490 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
5491DECL_INLINE_THROW(uint32_t)
5492iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5493 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
5494{
5495 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5496
5497 /*
5498 * Handle the disp16 form with no registers first.
5499 *
5500 * Convert to an immediate value, as that'll delay the register allocation
5501 * and assignment till the memory access / call / whatever and we can use
5502 * a more appropriate register (or none at all).
5503 */
5504 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
5505 {
5506 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
5507 return off;
5508 }
5509
5510 /* Determin the displacment. */
5511 uint16_t u16EffAddr;
5512 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5513 {
5514 case 0: u16EffAddr = 0; break;
5515 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
5516 case 2: u16EffAddr = u16Disp; break;
5517 default: AssertFailedStmt(u16EffAddr = 0);
5518 }
5519
5520 /* Determine the registers involved. */
5521 uint8_t idxGstRegBase;
5522 uint8_t idxGstRegIndex;
5523 switch (bRm & X86_MODRM_RM_MASK)
5524 {
5525 case 0:
5526 idxGstRegBase = X86_GREG_xBX;
5527 idxGstRegIndex = X86_GREG_xSI;
5528 break;
5529 case 1:
5530 idxGstRegBase = X86_GREG_xBX;
5531 idxGstRegIndex = X86_GREG_xDI;
5532 break;
5533 case 2:
5534 idxGstRegBase = X86_GREG_xBP;
5535 idxGstRegIndex = X86_GREG_xSI;
5536 break;
5537 case 3:
5538 idxGstRegBase = X86_GREG_xBP;
5539 idxGstRegIndex = X86_GREG_xDI;
5540 break;
5541 case 4:
5542 idxGstRegBase = X86_GREG_xSI;
5543 idxGstRegIndex = UINT8_MAX;
5544 break;
5545 case 5:
5546 idxGstRegBase = X86_GREG_xDI;
5547 idxGstRegIndex = UINT8_MAX;
5548 break;
5549 case 6:
5550 idxGstRegBase = X86_GREG_xBP;
5551 idxGstRegIndex = UINT8_MAX;
5552 break;
5553#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
5554 default:
5555#endif
5556 case 7:
5557 idxGstRegBase = X86_GREG_xBX;
5558 idxGstRegIndex = UINT8_MAX;
5559 break;
5560 }
5561
5562 /*
5563 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
5564 */
5565 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5566 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5567 kIemNativeGstRegUse_ReadOnly);
5568 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
5569 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5570 kIemNativeGstRegUse_ReadOnly)
5571 : UINT8_MAX;
5572#ifdef RT_ARCH_AMD64
5573 if (idxRegIndex == UINT8_MAX)
5574 {
5575 if (u16EffAddr == 0)
5576 {
5577 /* movxz ret, base */
5578 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
5579 }
5580 else
5581 {
5582 /* lea ret32, [base64 + disp32] */
5583 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5584 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5585 if (idxRegRet >= 8 || idxRegBase >= 8)
5586 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5587 pbCodeBuf[off++] = 0x8d;
5588 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5589 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
5590 else
5591 {
5592 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
5593 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5594 }
5595 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5596 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5597 pbCodeBuf[off++] = 0;
5598 pbCodeBuf[off++] = 0;
5599 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5600
5601 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5602 }
5603 }
5604 else
5605 {
5606 /* lea ret32, [index64 + base64 (+ disp32)] */
5607 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5608 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5609 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5610 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5611 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5612 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5613 pbCodeBuf[off++] = 0x8d;
5614 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
5615 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5616 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
5617 if (bMod == X86_MOD_MEM4)
5618 {
5619 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5620 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5621 pbCodeBuf[off++] = 0;
5622 pbCodeBuf[off++] = 0;
5623 }
5624 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5625 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5626 }
5627
5628#elif defined(RT_ARCH_ARM64)
5629 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5630 if (u16EffAddr == 0)
5631 {
5632 if (idxRegIndex == UINT8_MAX)
5633 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
5634 else
5635 {
5636 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
5637 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5638 }
5639 }
5640 else
5641 {
5642 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
5643 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
5644 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
5645 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5646 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
5647 else
5648 {
5649 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
5650 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5651 }
5652 if (idxRegIndex != UINT8_MAX)
5653 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
5654 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5655 }
5656
5657#else
5658# error "port me"
5659#endif
5660
5661 if (idxRegIndex != UINT8_MAX)
5662 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5663 iemNativeRegFreeTmp(pReNative, idxRegBase);
5664 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5665 return off;
5666}
5667
5668
5669#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
5670 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
5671
5672/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
5673 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
5674DECL_INLINE_THROW(uint32_t)
5675iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5676 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
5677{
5678 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5679
5680 /*
5681 * Handle the disp32 form with no registers first.
5682 *
5683 * Convert to an immediate value, as that'll delay the register allocation
5684 * and assignment till the memory access / call / whatever and we can use
5685 * a more appropriate register (or none at all).
5686 */
5687 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5688 {
5689 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
5690 return off;
5691 }
5692
5693 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
5694 uint32_t u32EffAddr = 0;
5695 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5696 {
5697 case 0: break;
5698 case 1: u32EffAddr = (int8_t)u32Disp; break;
5699 case 2: u32EffAddr = u32Disp; break;
5700 default: AssertFailed();
5701 }
5702
5703 /* Get the register (or SIB) value. */
5704 uint8_t idxGstRegBase = UINT8_MAX;
5705 uint8_t idxGstRegIndex = UINT8_MAX;
5706 uint8_t cShiftIndex = 0;
5707 switch (bRm & X86_MODRM_RM_MASK)
5708 {
5709 case 0: idxGstRegBase = X86_GREG_xAX; break;
5710 case 1: idxGstRegBase = X86_GREG_xCX; break;
5711 case 2: idxGstRegBase = X86_GREG_xDX; break;
5712 case 3: idxGstRegBase = X86_GREG_xBX; break;
5713 case 4: /* SIB */
5714 {
5715 /* index /w scaling . */
5716 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
5717 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
5718 {
5719 case 0: idxGstRegIndex = X86_GREG_xAX; break;
5720 case 1: idxGstRegIndex = X86_GREG_xCX; break;
5721 case 2: idxGstRegIndex = X86_GREG_xDX; break;
5722 case 3: idxGstRegIndex = X86_GREG_xBX; break;
5723 case 4: cShiftIndex = 0; /*no index*/ break;
5724 case 5: idxGstRegIndex = X86_GREG_xBP; break;
5725 case 6: idxGstRegIndex = X86_GREG_xSI; break;
5726 case 7: idxGstRegIndex = X86_GREG_xDI; break;
5727 }
5728
5729 /* base */
5730 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
5731 {
5732 case 0: idxGstRegBase = X86_GREG_xAX; break;
5733 case 1: idxGstRegBase = X86_GREG_xCX; break;
5734 case 2: idxGstRegBase = X86_GREG_xDX; break;
5735 case 3: idxGstRegBase = X86_GREG_xBX; break;
5736 case 4:
5737 idxGstRegBase = X86_GREG_xSP;
5738 u32EffAddr += uSibAndRspOffset >> 8;
5739 break;
5740 case 5:
5741 if ((bRm & X86_MODRM_MOD_MASK) != 0)
5742 idxGstRegBase = X86_GREG_xBP;
5743 else
5744 {
5745 Assert(u32EffAddr == 0);
5746 u32EffAddr = u32Disp;
5747 }
5748 break;
5749 case 6: idxGstRegBase = X86_GREG_xSI; break;
5750 case 7: idxGstRegBase = X86_GREG_xDI; break;
5751 }
5752 break;
5753 }
5754 case 5: idxGstRegBase = X86_GREG_xBP; break;
5755 case 6: idxGstRegBase = X86_GREG_xSI; break;
5756 case 7: idxGstRegBase = X86_GREG_xDI; break;
5757 }
5758
5759 /*
5760 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
5761 * the start of the function.
5762 */
5763 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
5764 {
5765 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
5766 return off;
5767 }
5768
5769 /*
5770 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5771 */
5772 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5773 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
5774 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5775 kIemNativeGstRegUse_ReadOnly);
5776 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
5777 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5778 kIemNativeGstRegUse_ReadOnly);
5779
5780 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
5781 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
5782 {
5783 idxRegBase = idxRegIndex;
5784 idxRegIndex = UINT8_MAX;
5785 }
5786
5787#ifdef RT_ARCH_AMD64
5788 if (idxRegIndex == UINT8_MAX)
5789 {
5790 if (u32EffAddr == 0)
5791 {
5792 /* mov ret, base */
5793 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5794 }
5795 else
5796 {
5797 /* lea ret32, [base64 + disp32] */
5798 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5799 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5800 if (idxRegRet >= 8 || idxRegBase >= 8)
5801 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5802 pbCodeBuf[off++] = 0x8d;
5803 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5804 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5805 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
5806 else
5807 {
5808 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5809 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5810 }
5811 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5812 if (bMod == X86_MOD_MEM4)
5813 {
5814 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5815 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5816 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5817 }
5818 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5819 }
5820 }
5821 else
5822 {
5823 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5824 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5825 if (idxRegBase == UINT8_MAX)
5826 {
5827 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
5828 if (idxRegRet >= 8 || idxRegIndex >= 8)
5829 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5830 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5831 pbCodeBuf[off++] = 0x8d;
5832 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
5833 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
5834 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5835 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5836 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5837 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5838 }
5839 else
5840 {
5841 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
5842 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5843 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5844 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5845 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5846 pbCodeBuf[off++] = 0x8d;
5847 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
5848 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5849 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5850 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
5851 if (bMod != X86_MOD_MEM0)
5852 {
5853 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5854 if (bMod == X86_MOD_MEM4)
5855 {
5856 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5857 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5858 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5859 }
5860 }
5861 }
5862 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5863 }
5864
5865#elif defined(RT_ARCH_ARM64)
5866 if (u32EffAddr == 0)
5867 {
5868 if (idxRegIndex == UINT8_MAX)
5869 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5870 else if (idxRegBase == UINT8_MAX)
5871 {
5872 if (cShiftIndex == 0)
5873 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
5874 else
5875 {
5876 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5877 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
5878 }
5879 }
5880 else
5881 {
5882 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5883 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
5884 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5885 }
5886 }
5887 else
5888 {
5889 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
5890 {
5891 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5892 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
5893 }
5894 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
5895 {
5896 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5897 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5898 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
5899 }
5900 else
5901 {
5902 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
5903 if (idxRegBase != UINT8_MAX)
5904 {
5905 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5906 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5907 }
5908 }
5909 if (idxRegIndex != UINT8_MAX)
5910 {
5911 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5912 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5913 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5914 }
5915 }
5916
5917#else
5918# error "port me"
5919#endif
5920
5921 if (idxRegIndex != UINT8_MAX)
5922 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5923 if (idxRegBase != UINT8_MAX)
5924 iemNativeRegFreeTmp(pReNative, idxRegBase);
5925 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5926 return off;
5927}
5928
5929
5930#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5931 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5932 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5933
5934#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5935 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5936 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5937
5938#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5939 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5940 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
5941
5942/**
5943 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
5944 *
5945 * @returns New off.
5946 * @param pReNative .
5947 * @param off .
5948 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
5949 * bit 4 to REX.X. The two bits are part of the
5950 * REG sub-field, which isn't needed in this
5951 * function.
5952 * @param uSibAndRspOffset Two parts:
5953 * - The first 8 bits make up the SIB byte.
5954 * - The next 8 bits are the fixed RSP/ESP offset
5955 * in case of a pop [xSP].
5956 * @param u32Disp The displacement byte/word/dword, if any.
5957 * @param cbInstr The size of the fully decoded instruction. Used
5958 * for RIP relative addressing.
5959 * @param idxVarRet The result variable number.
5960 * @param f64Bit Whether to use a 64-bit or 32-bit address size
5961 * when calculating the address.
5962 *
5963 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
5964 */
5965DECL_INLINE_THROW(uint32_t)
5966iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
5967 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
5968{
5969 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5970
5971 /*
5972 * Special case the rip + disp32 form first.
5973 */
5974 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5975 {
5976#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5977 /* Need to take the current PC offset into account for the displacement, no need to flush here
5978 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
5979 u32Disp += pReNative->Core.offPc;
5980#endif
5981
5982 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5983 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
5984 kIemNativeGstRegUse_ReadOnly);
5985#ifdef RT_ARCH_AMD64
5986 if (f64Bit)
5987 {
5988 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
5989 if ((int32_t)offFinalDisp == offFinalDisp)
5990 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
5991 else
5992 {
5993 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
5994 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
5995 }
5996 }
5997 else
5998 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
5999
6000#elif defined(RT_ARCH_ARM64)
6001 if (f64Bit)
6002 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
6003 (int64_t)(int32_t)u32Disp + cbInstr);
6004 else
6005 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
6006 (int32_t)u32Disp + cbInstr);
6007
6008#else
6009# error "Port me!"
6010#endif
6011 iemNativeRegFreeTmp(pReNative, idxRegPc);
6012 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6013 return off;
6014 }
6015
6016 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6017 int64_t i64EffAddr = 0;
6018 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6019 {
6020 case 0: break;
6021 case 1: i64EffAddr = (int8_t)u32Disp; break;
6022 case 2: i64EffAddr = (int32_t)u32Disp; break;
6023 default: AssertFailed();
6024 }
6025
6026 /* Get the register (or SIB) value. */
6027 uint8_t idxGstRegBase = UINT8_MAX;
6028 uint8_t idxGstRegIndex = UINT8_MAX;
6029 uint8_t cShiftIndex = 0;
6030 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6031 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6032 else /* SIB: */
6033 {
6034 /* index /w scaling . */
6035 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6036 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6037 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6038 if (idxGstRegIndex == 4)
6039 {
6040 /* no index */
6041 cShiftIndex = 0;
6042 idxGstRegIndex = UINT8_MAX;
6043 }
6044
6045 /* base */
6046 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6047 if (idxGstRegBase == 4)
6048 {
6049 /* pop [rsp] hack */
6050 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6051 }
6052 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6053 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6054 {
6055 /* mod=0 and base=5 -> disp32, no base reg. */
6056 Assert(i64EffAddr == 0);
6057 i64EffAddr = (int32_t)u32Disp;
6058 idxGstRegBase = UINT8_MAX;
6059 }
6060 }
6061
6062 /*
6063 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6064 * the start of the function.
6065 */
6066 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6067 {
6068 if (f64Bit)
6069 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6070 else
6071 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6072 return off;
6073 }
6074
6075 /*
6076 * Now emit code that calculates:
6077 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6078 * or if !f64Bit:
6079 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6080 */
6081 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6082 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6083 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6084 kIemNativeGstRegUse_ReadOnly);
6085 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6086 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6087 kIemNativeGstRegUse_ReadOnly);
6088
6089 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6090 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6091 {
6092 idxRegBase = idxRegIndex;
6093 idxRegIndex = UINT8_MAX;
6094 }
6095
6096#ifdef RT_ARCH_AMD64
6097 uint8_t bFinalAdj;
6098 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6099 bFinalAdj = 0; /* likely */
6100 else
6101 {
6102 /* pop [rsp] with a problematic disp32 value. Split out the
6103 RSP offset and add it separately afterwards (bFinalAdj). */
6104 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6105 Assert(idxGstRegBase == X86_GREG_xSP);
6106 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6107 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6108 Assert(bFinalAdj != 0);
6109 i64EffAddr -= bFinalAdj;
6110 Assert((int32_t)i64EffAddr == i64EffAddr);
6111 }
6112 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6113//pReNative->pInstrBuf[off++] = 0xcc;
6114
6115 if (idxRegIndex == UINT8_MAX)
6116 {
6117 if (u32EffAddr == 0)
6118 {
6119 /* mov ret, base */
6120 if (f64Bit)
6121 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6122 else
6123 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6124 }
6125 else
6126 {
6127 /* lea ret, [base + disp32] */
6128 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6129 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6130 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6131 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6132 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6133 | (f64Bit ? X86_OP_REX_W : 0);
6134 pbCodeBuf[off++] = 0x8d;
6135 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6136 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6137 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6138 else
6139 {
6140 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6141 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6142 }
6143 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6144 if (bMod == X86_MOD_MEM4)
6145 {
6146 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6147 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6148 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6149 }
6150 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6151 }
6152 }
6153 else
6154 {
6155 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6156 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6157 if (idxRegBase == UINT8_MAX)
6158 {
6159 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6160 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6161 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6162 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6163 | (f64Bit ? X86_OP_REX_W : 0);
6164 pbCodeBuf[off++] = 0x8d;
6165 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6166 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6167 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6168 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6169 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6170 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6171 }
6172 else
6173 {
6174 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6175 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6176 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6177 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6178 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6179 | (f64Bit ? X86_OP_REX_W : 0);
6180 pbCodeBuf[off++] = 0x8d;
6181 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6182 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6183 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6184 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6185 if (bMod != X86_MOD_MEM0)
6186 {
6187 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6188 if (bMod == X86_MOD_MEM4)
6189 {
6190 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6191 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6192 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6193 }
6194 }
6195 }
6196 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6197 }
6198
6199 if (!bFinalAdj)
6200 { /* likely */ }
6201 else
6202 {
6203 Assert(f64Bit);
6204 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6205 }
6206
6207#elif defined(RT_ARCH_ARM64)
6208 if (i64EffAddr == 0)
6209 {
6210 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6211 if (idxRegIndex == UINT8_MAX)
6212 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6213 else if (idxRegBase != UINT8_MAX)
6214 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6215 f64Bit, false /*fSetFlags*/, cShiftIndex);
6216 else
6217 {
6218 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6219 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6220 }
6221 }
6222 else
6223 {
6224 if (f64Bit)
6225 { /* likely */ }
6226 else
6227 i64EffAddr = (int32_t)i64EffAddr;
6228
6229 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6230 {
6231 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6232 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
6233 }
6234 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
6235 {
6236 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6237 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
6238 }
6239 else
6240 {
6241 if (f64Bit)
6242 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
6243 else
6244 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
6245 if (idxRegBase != UINT8_MAX)
6246 {
6247 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6248 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
6249 }
6250 }
6251 if (idxRegIndex != UINT8_MAX)
6252 {
6253 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6254 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6255 f64Bit, false /*fSetFlags*/, cShiftIndex);
6256 }
6257 }
6258
6259#else
6260# error "port me"
6261#endif
6262
6263 if (idxRegIndex != UINT8_MAX)
6264 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6265 if (idxRegBase != UINT8_MAX)
6266 iemNativeRegFreeTmp(pReNative, idxRegBase);
6267 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6268 return off;
6269}
6270
6271
6272/*********************************************************************************************************************************
6273* Memory fetches and stores common *
6274*********************************************************************************************************************************/
6275
6276typedef enum IEMNATIVEMITMEMOP
6277{
6278 kIemNativeEmitMemOp_Store = 0,
6279 kIemNativeEmitMemOp_Fetch,
6280 kIemNativeEmitMemOp_Fetch_Zx_U16,
6281 kIemNativeEmitMemOp_Fetch_Zx_U32,
6282 kIemNativeEmitMemOp_Fetch_Zx_U64,
6283 kIemNativeEmitMemOp_Fetch_Sx_U16,
6284 kIemNativeEmitMemOp_Fetch_Sx_U32,
6285 kIemNativeEmitMemOp_Fetch_Sx_U64
6286} IEMNATIVEMITMEMOP;
6287
6288/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
6289 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
6290 * (with iSegReg = UINT8_MAX). */
6291DECL_INLINE_THROW(uint32_t)
6292iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
6293 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
6294 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
6295{
6296 /*
6297 * Assert sanity.
6298 */
6299 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6300 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6301 Assert( enmOp != kIemNativeEmitMemOp_Store
6302 || pVarValue->enmKind == kIemNativeVarKind_Immediate
6303 || pVarValue->enmKind == kIemNativeVarKind_Stack);
6304 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
6305 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
6306 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
6307 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
6308 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6309 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
6310#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6311 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
6312 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
6313#else
6314 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
6315#endif
6316 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
6317 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6318#ifdef VBOX_STRICT
6319 if (iSegReg == UINT8_MAX)
6320 {
6321 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6322 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6323 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6324 switch (cbMem)
6325 {
6326 case 1:
6327 Assert( pfnFunction
6328 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
6329 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6330 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6331 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6332 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6333 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
6334 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
6335 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
6336 : UINT64_C(0xc000b000a0009000) ));
6337 Assert(!fAlignMaskAndCtl);
6338 break;
6339 case 2:
6340 Assert( pfnFunction
6341 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
6342 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6343 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6344 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6345 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
6346 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
6347 : UINT64_C(0xc000b000a0009000) ));
6348 Assert(fAlignMaskAndCtl <= 1);
6349 break;
6350 case 4:
6351 Assert( pfnFunction
6352 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
6353 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6354 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6355 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
6356 : UINT64_C(0xc000b000a0009000) ));
6357 Assert(fAlignMaskAndCtl <= 3);
6358 break;
6359 case 8:
6360 Assert( pfnFunction
6361 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
6362 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
6363 : UINT64_C(0xc000b000a0009000) ));
6364 Assert(fAlignMaskAndCtl <= 7);
6365 break;
6366#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6367 case sizeof(RTUINT128U):
6368 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6369 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
6370 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6371 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
6372 || ( enmOp == kIemNativeEmitMemOp_Store
6373 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6374 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
6375 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6376 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6377 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6378 : fAlignMaskAndCtl <= 15);
6379 break;
6380 case sizeof(RTUINT256U):
6381 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6382 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
6383 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
6384 || ( enmOp == kIemNativeEmitMemOp_Store
6385 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
6386 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
6387 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
6388 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
6389 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6390 : fAlignMaskAndCtl <= 31);
6391 break;
6392#endif
6393 }
6394 }
6395 else
6396 {
6397 Assert(iSegReg < 6);
6398 switch (cbMem)
6399 {
6400 case 1:
6401 Assert( pfnFunction
6402 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
6403 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
6404 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6405 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6406 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6407 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
6408 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
6409 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
6410 : UINT64_C(0xc000b000a0009000) ));
6411 Assert(!fAlignMaskAndCtl);
6412 break;
6413 case 2:
6414 Assert( pfnFunction
6415 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
6416 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
6417 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6418 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6419 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
6420 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
6421 : UINT64_C(0xc000b000a0009000) ));
6422 Assert(fAlignMaskAndCtl <= 1);
6423 break;
6424 case 4:
6425 Assert( pfnFunction
6426 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
6427 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
6428 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
6429 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
6430 : UINT64_C(0xc000b000a0009000) ));
6431 Assert(fAlignMaskAndCtl <= 3);
6432 break;
6433 case 8:
6434 Assert( pfnFunction
6435 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
6436 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
6437 : UINT64_C(0xc000b000a0009000) ));
6438 Assert(fAlignMaskAndCtl <= 7);
6439 break;
6440#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6441 case sizeof(RTUINT128U):
6442 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6443 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
6444 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6445 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
6446 || ( enmOp == kIemNativeEmitMemOp_Store
6447 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6448 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
6449 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6450 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6451 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6452 : fAlignMaskAndCtl <= 15);
6453 break;
6454 case sizeof(RTUINT256U):
6455 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6456 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
6457 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
6458 || ( enmOp == kIemNativeEmitMemOp_Store
6459 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
6460 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
6461 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
6462 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
6463 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6464 : fAlignMaskAndCtl <= 31);
6465 break;
6466#endif
6467 }
6468 }
6469#endif
6470
6471#ifdef VBOX_STRICT
6472 /*
6473 * Check that the fExec flags we've got make sense.
6474 */
6475 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6476#endif
6477
6478 /*
6479 * To keep things simple we have to commit any pending writes first as we
6480 * may end up making calls.
6481 */
6482 /** @todo we could postpone this till we make the call and reload the
6483 * registers after returning from the call. Not sure if that's sensible or
6484 * not, though. */
6485#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6486 off = iemNativeRegFlushPendingWrites(pReNative, off);
6487#else
6488 /* The program counter is treated differently for now. */
6489 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
6490#endif
6491
6492#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6493 /*
6494 * Move/spill/flush stuff out of call-volatile registers.
6495 * This is the easy way out. We could contain this to the tlb-miss branch
6496 * by saving and restoring active stuff here.
6497 */
6498 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
6499#endif
6500
6501 /*
6502 * Define labels and allocate the result register (trying for the return
6503 * register if we can).
6504 */
6505 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6506#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6507 uint8_t idxRegValueFetch = UINT8_MAX;
6508
6509 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6510 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6511 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
6512 else
6513 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6514 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6515 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6516 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6517#else
6518 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6519 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6520 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6521 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6522#endif
6523 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
6524
6525#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6526 uint8_t idxRegValueStore = UINT8_MAX;
6527
6528 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6529 idxRegValueStore = !TlbState.fSkip
6530 && enmOp == kIemNativeEmitMemOp_Store
6531 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6532 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6533 : UINT8_MAX;
6534 else
6535 idxRegValueStore = !TlbState.fSkip
6536 && enmOp == kIemNativeEmitMemOp_Store
6537 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6538 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6539 : UINT8_MAX;
6540
6541#else
6542 uint8_t const idxRegValueStore = !TlbState.fSkip
6543 && enmOp == kIemNativeEmitMemOp_Store
6544 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6545 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6546 : UINT8_MAX;
6547#endif
6548 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6549 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6550 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6551 : UINT32_MAX;
6552
6553 /*
6554 * Jump to the TLB lookup code.
6555 */
6556 if (!TlbState.fSkip)
6557 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6558
6559 /*
6560 * TlbMiss:
6561 *
6562 * Call helper to do the fetching.
6563 * We flush all guest register shadow copies here.
6564 */
6565 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
6566
6567#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6568 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6569#else
6570 RT_NOREF(idxInstr);
6571#endif
6572
6573#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6574 if (pReNative->Core.offPc)
6575 {
6576 /*
6577 * Update the program counter but restore it at the end of the TlbMiss branch.
6578 * This should allow delaying more program counter updates for the TlbLookup and hit paths
6579 * which are hopefully much more frequent, reducing the amount of memory accesses.
6580 */
6581 /* Allocate a temporary PC register. */
6582 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6583
6584 /* Perform the addition and store the result. */
6585 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6586 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6587
6588 /* Free and flush the PC register. */
6589 iemNativeRegFreeTmp(pReNative, idxPcReg);
6590 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6591 }
6592#endif
6593
6594#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6595 /* Save variables in volatile registers. */
6596 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6597 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
6598 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
6599 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6600#endif
6601
6602 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
6603 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
6604#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6605 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6606 {
6607 /*
6608 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
6609 *
6610 * @note There was a register variable assigned to the variable for the TlbLookup case above
6611 * which must not be freed or the value loaded into the register will not be synced into the register
6612 * further down the road because the variable doesn't know it had a variable assigned.
6613 *
6614 * @note For loads it is not required to sync what is in the assigned register with the stack slot
6615 * as it will be overwritten anyway.
6616 */
6617 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6618 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
6619 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
6620 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6621 }
6622 else
6623#endif
6624 if (enmOp == kIemNativeEmitMemOp_Store)
6625 {
6626 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6627 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
6628#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6629 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6630#else
6631 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
6632 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6633#endif
6634 }
6635
6636 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
6637 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
6638#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6639 fVolGregMask);
6640#else
6641 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
6642#endif
6643
6644 if (iSegReg != UINT8_MAX)
6645 {
6646 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
6647 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
6648 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
6649 }
6650
6651 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6652 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6653
6654 /* Done setting up parameters, make the call. */
6655 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6656
6657 /*
6658 * Put the result in the right register if this is a fetch.
6659 */
6660 if (enmOp != kIemNativeEmitMemOp_Store)
6661 {
6662#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6663 if ( cbMem == sizeof(RTUINT128U)
6664 || cbMem == sizeof(RTUINT256U))
6665 {
6666 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
6667
6668 /* Sync the value on the stack with the host register assigned to the variable. */
6669 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
6670 }
6671 else
6672#endif
6673 {
6674 Assert(idxRegValueFetch == pVarValue->idxReg);
6675 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
6676 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
6677 }
6678 }
6679
6680#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6681 /* Restore variables and guest shadow registers to volatile registers. */
6682 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6683 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6684#endif
6685
6686#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6687 if (pReNative->Core.offPc)
6688 {
6689 /*
6690 * Time to restore the program counter to its original value.
6691 */
6692 /* Allocate a temporary PC register. */
6693 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6694 kIemNativeGstRegUse_ForUpdate);
6695
6696 /* Restore the original value. */
6697 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6698 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6699
6700 /* Free and flush the PC register. */
6701 iemNativeRegFreeTmp(pReNative, idxPcReg);
6702 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6703 }
6704#endif
6705
6706#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6707 if (!TlbState.fSkip)
6708 {
6709 /* end of TlbMiss - Jump to the done label. */
6710 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6711 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6712
6713 /*
6714 * TlbLookup:
6715 */
6716 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
6717 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
6718 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
6719
6720 /*
6721 * Emit code to do the actual storing / fetching.
6722 */
6723 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6724# ifdef IEM_WITH_TLB_STATISTICS
6725 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6726 enmOp == kIemNativeEmitMemOp_Store
6727 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
6728 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
6729# endif
6730 switch (enmOp)
6731 {
6732 case kIemNativeEmitMemOp_Store:
6733 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
6734 {
6735 switch (cbMem)
6736 {
6737 case 1:
6738 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6739 break;
6740 case 2:
6741 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6742 break;
6743 case 4:
6744 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6745 break;
6746 case 8:
6747 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6748 break;
6749#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6750 case sizeof(RTUINT128U):
6751 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6752 break;
6753 case sizeof(RTUINT256U):
6754 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6755 break;
6756#endif
6757 default:
6758 AssertFailed();
6759 }
6760 }
6761 else
6762 {
6763 switch (cbMem)
6764 {
6765 case 1:
6766 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
6767 idxRegMemResult, TlbState.idxReg1);
6768 break;
6769 case 2:
6770 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6771 idxRegMemResult, TlbState.idxReg1);
6772 break;
6773 case 4:
6774 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6775 idxRegMemResult, TlbState.idxReg1);
6776 break;
6777 case 8:
6778 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
6779 idxRegMemResult, TlbState.idxReg1);
6780 break;
6781 default:
6782 AssertFailed();
6783 }
6784 }
6785 break;
6786
6787 case kIemNativeEmitMemOp_Fetch:
6788 case kIemNativeEmitMemOp_Fetch_Zx_U16:
6789 case kIemNativeEmitMemOp_Fetch_Zx_U32:
6790 case kIemNativeEmitMemOp_Fetch_Zx_U64:
6791 switch (cbMem)
6792 {
6793 case 1:
6794 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6795 break;
6796 case 2:
6797 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6798 break;
6799 case 4:
6800 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6801 break;
6802 case 8:
6803 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6804 break;
6805#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6806 case sizeof(RTUINT128U):
6807 /*
6808 * No need to sync back the register with the stack, this is done by the generic variable handling
6809 * code if there is a register assigned to a variable and the stack must be accessed.
6810 */
6811 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6812 break;
6813 case sizeof(RTUINT256U):
6814 /*
6815 * No need to sync back the register with the stack, this is done by the generic variable handling
6816 * code if there is a register assigned to a variable and the stack must be accessed.
6817 */
6818 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6819 break;
6820#endif
6821 default:
6822 AssertFailed();
6823 }
6824 break;
6825
6826 case kIemNativeEmitMemOp_Fetch_Sx_U16:
6827 Assert(cbMem == 1);
6828 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6829 break;
6830
6831 case kIemNativeEmitMemOp_Fetch_Sx_U32:
6832 Assert(cbMem == 1 || cbMem == 2);
6833 if (cbMem == 1)
6834 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6835 else
6836 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6837 break;
6838
6839 case kIemNativeEmitMemOp_Fetch_Sx_U64:
6840 switch (cbMem)
6841 {
6842 case 1:
6843 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6844 break;
6845 case 2:
6846 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6847 break;
6848 case 4:
6849 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6850 break;
6851 default:
6852 AssertFailed();
6853 }
6854 break;
6855
6856 default:
6857 AssertFailed();
6858 }
6859
6860 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6861
6862 /*
6863 * TlbDone:
6864 */
6865 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6866
6867 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
6868
6869# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6870 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
6871 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6872# endif
6873 }
6874#else
6875 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
6876#endif
6877
6878 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
6879 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6880 return off;
6881}
6882
6883
6884
6885/*********************************************************************************************************************************
6886* Memory fetches (IEM_MEM_FETCH_XXX). *
6887*********************************************************************************************************************************/
6888
6889/* 8-bit segmented: */
6890#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
6891 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
6892 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
6893 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6894
6895#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6896 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6897 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
6898 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6899
6900#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6901 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6902 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6903 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6904
6905#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6906 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6907 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6908 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6909
6910#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6911 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6912 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
6913 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
6914
6915#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6916 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6917 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6918 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
6919
6920#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6921 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6922 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6923 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
6924
6925/* 16-bit segmented: */
6926#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6927 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6928 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6929 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6930
6931#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6932 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6933 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6934 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
6935
6936#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6937 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6938 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6939 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6940
6941#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6942 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6943 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6944 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6945
6946#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6947 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6948 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6949 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6950
6951#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6952 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6953 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6954 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
6955
6956
6957/* 32-bit segmented: */
6958#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6959 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6960 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6961 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6962
6963#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6964 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6965 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6966 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
6967
6968#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6969 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6970 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6971 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6972
6973#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6974 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6975 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6976 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
6977
6978#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
6979 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
6980 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6981 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6982
6983#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
6984 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
6985 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6986 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6987
6988#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
6989 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
6990 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6991 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6992
6993AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
6994#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
6995 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
6996 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
6997 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6998
6999
7000/* 64-bit segmented: */
7001#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7002 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7003 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7004 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7005
7006AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7007#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7008 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
7009 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7010 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7011
7012
7013/* 8-bit flat: */
7014#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7015 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
7016 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7017 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7018
7019#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7020 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7021 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7022 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7023
7024#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7025 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7026 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7027 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7028
7029#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7030 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7031 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7032 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7033
7034#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7035 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7036 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7037 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7038
7039#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7040 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7041 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7042 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7043
7044#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7045 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7046 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7047 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7048
7049
7050/* 16-bit flat: */
7051#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7052 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7053 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7054 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7055
7056#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7057 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7058 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7059 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7060
7061#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7062 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7063 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7064 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7065
7066#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7067 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7068 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7069 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7070
7071#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7072 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7073 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7074 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7075
7076#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7077 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7078 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7079 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7080
7081/* 32-bit flat: */
7082#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7083 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7084 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7085 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7086
7087#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7088 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7089 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7090 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7091
7092#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7093 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7094 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7095 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7096
7097#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7098 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7099 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7100 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7101
7102#define IEM_MC_FETCH_MEM_FLAT_I16(a_i32Dst, a_GCPtrMem) \
7103 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7104 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7105 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7106
7107#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7108 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7109 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7110 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7111
7112#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7113 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7114 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7115 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7116
7117#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7118 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7119 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7120 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7121
7122
7123/* 64-bit flat: */
7124#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7125 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7126 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7127 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7128
7129#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7130 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7131 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7132 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7133
7134#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7135/* 128-bit segmented: */
7136#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7137 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7138 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7139 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7140
7141#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7142 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7143 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7144 kIemNativeEmitMemOp_Fetch, \
7145 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7146
7147AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7148#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7149 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
7150 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7151 kIemNativeEmitMemOp_Fetch, \
7152 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7153
7154#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7155 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7156 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7157 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7158
7159#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7160 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7161 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7162 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7163
7164
7165/* 128-bit flat: */
7166#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7167 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7168 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7169 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7170
7171#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7172 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7173 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7174 kIemNativeEmitMemOp_Fetch, \
7175 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7176
7177#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7178 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
7179 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7180 kIemNativeEmitMemOp_Fetch, \
7181 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7182
7183#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7184 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7185 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7186 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7187
7188#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7189 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
7190 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7191 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7192
7193/* 256-bit segmented: */
7194#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7195 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7196 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7197 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7198
7199#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7200 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7201 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7202 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7203
7204#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7205 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7206 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7207 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7208
7209#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7210 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7211 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7212 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7213
7214
7215/* 256-bit flat: */
7216#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
7217 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7218 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7219 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7220
7221#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
7222 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7223 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7224 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7225
7226#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
7227 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7228 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7229 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7230
7231#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
7232 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, \
7233 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7234 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7235
7236#endif
7237
7238
7239/*********************************************************************************************************************************
7240* Memory stores (IEM_MEM_STORE_XXX). *
7241*********************************************************************************************************************************/
7242
7243#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
7244 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
7245 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7246 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7247
7248#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
7249 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
7250 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7251 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7252
7253#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
7254 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
7255 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7256 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7257
7258#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
7259 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
7260 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7261 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7262
7263
7264#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
7265 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
7266 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7267 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7268
7269#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
7270 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
7271 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7272 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7273
7274#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
7275 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
7276 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7277 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7278
7279#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
7280 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
7281 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7282 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7283
7284
7285#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
7286 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7287 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7288
7289#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
7290 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7291 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7292
7293#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
7294 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7295 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7296
7297#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
7298 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7299 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7300
7301
7302#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
7303 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7304 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7305
7306#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
7307 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7308 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7309
7310#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
7311 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7312 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7313
7314#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
7315 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7316 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7317
7318/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
7319 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
7320DECL_INLINE_THROW(uint32_t)
7321iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
7322 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
7323{
7324 /*
7325 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
7326 * to do the grunt work.
7327 */
7328 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
7329 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
7330 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
7331 pfnFunction, idxInstr);
7332 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
7333 return off;
7334}
7335
7336
7337#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7338# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
7339 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7340 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7341 kIemNativeEmitMemOp_Store, \
7342 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
7343
7344# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
7345 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
7346 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7347 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
7348
7349# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
7350 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
7351 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7352 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
7353
7354# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
7355 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7356 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7357 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7358
7359
7360# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
7361 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7362 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7363 kIemNativeEmitMemOp_Store, \
7364 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
7365
7366# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
7367 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
7368 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7369 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
7370
7371# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
7372 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
7373 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7374 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
7375
7376# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
7377 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7378 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7379 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7380#endif
7381
7382
7383
7384/*********************************************************************************************************************************
7385* Stack Accesses. *
7386*********************************************************************************************************************************/
7387/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
7388#define IEM_MC_PUSH_U16(a_u16Value) \
7389 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7390 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
7391#define IEM_MC_PUSH_U32(a_u32Value) \
7392 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7393 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
7394#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
7395 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
7396 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
7397#define IEM_MC_PUSH_U64(a_u64Value) \
7398 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7399 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
7400
7401#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
7402 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7403 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7404#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
7405 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7406 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
7407#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
7408 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
7409 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
7410
7411#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
7412 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7413 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7414#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
7415 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7416 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
7417
7418
7419/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
7420DECL_INLINE_THROW(uint32_t)
7421iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
7422 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7423{
7424 /*
7425 * Assert sanity.
7426 */
7427 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7428 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7429#ifdef VBOX_STRICT
7430 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7431 {
7432 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7433 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7434 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7435 Assert( pfnFunction
7436 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7437 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
7438 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
7439 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7440 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
7441 : UINT64_C(0xc000b000a0009000) ));
7442 }
7443 else
7444 Assert( pfnFunction
7445 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
7446 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
7447 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
7448 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
7449 : UINT64_C(0xc000b000a0009000) ));
7450#endif
7451
7452#ifdef VBOX_STRICT
7453 /*
7454 * Check that the fExec flags we've got make sense.
7455 */
7456 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7457#endif
7458
7459 /*
7460 * To keep things simple we have to commit any pending writes first as we
7461 * may end up making calls.
7462 */
7463 /** @todo we could postpone this till we make the call and reload the
7464 * registers after returning from the call. Not sure if that's sensible or
7465 * not, though. */
7466 off = iemNativeRegFlushPendingWrites(pReNative, off);
7467
7468 /*
7469 * First we calculate the new RSP and the effective stack pointer value.
7470 * For 64-bit mode and flat 32-bit these two are the same.
7471 * (Code structure is very similar to that of PUSH)
7472 */
7473 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7474 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
7475 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
7476 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
7477 ? cbMem : sizeof(uint16_t);
7478 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7479 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7480 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7481 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7482 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7483 if (cBitsFlat != 0)
7484 {
7485 Assert(idxRegEffSp == idxRegRsp);
7486 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7487 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7488 if (cBitsFlat == 64)
7489 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
7490 else
7491 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
7492 }
7493 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7494 {
7495 Assert(idxRegEffSp != idxRegRsp);
7496 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7497 kIemNativeGstRegUse_ReadOnly);
7498#ifdef RT_ARCH_AMD64
7499 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7500#else
7501 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7502#endif
7503 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7504 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7505 offFixupJumpToUseOtherBitSp = off;
7506 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7507 {
7508 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7509 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7510 }
7511 else
7512 {
7513 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7514 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7515 }
7516 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7517 }
7518 /* SpUpdateEnd: */
7519 uint32_t const offLabelSpUpdateEnd = off;
7520
7521 /*
7522 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7523 * we're skipping lookup).
7524 */
7525 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7526 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
7527 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7528 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7529 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7530 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7531 : UINT32_MAX;
7532 uint8_t const idxRegValue = !TlbState.fSkip
7533 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7534 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
7535 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
7536 : UINT8_MAX;
7537 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7538
7539
7540 if (!TlbState.fSkip)
7541 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7542 else
7543 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7544
7545 /*
7546 * Use16BitSp:
7547 */
7548 if (cBitsFlat == 0)
7549 {
7550#ifdef RT_ARCH_AMD64
7551 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7552#else
7553 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7554#endif
7555 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7556 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7557 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7558 else
7559 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7560 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7561 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7562 }
7563
7564 /*
7565 * TlbMiss:
7566 *
7567 * Call helper to do the pushing.
7568 */
7569 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7570
7571#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7572 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7573#else
7574 RT_NOREF(idxInstr);
7575#endif
7576
7577 /* Save variables in volatile registers. */
7578 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7579 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7580 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
7581 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
7582 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7583
7584 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
7585 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
7586 {
7587 /* Swap them using ARG0 as temp register: */
7588 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
7589 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
7590 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
7591 }
7592 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
7593 {
7594 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
7595 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
7596 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7597
7598 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
7599 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7600 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7601 }
7602 else
7603 {
7604 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
7605 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7606
7607 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
7608 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
7609 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
7610 }
7611
7612 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7613 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7614
7615 /* Done setting up parameters, make the call. */
7616 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7617
7618 /* Restore variables and guest shadow registers to volatile registers. */
7619 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7620 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7621
7622#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7623 if (!TlbState.fSkip)
7624 {
7625 /* end of TlbMiss - Jump to the done label. */
7626 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7627 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7628
7629 /*
7630 * TlbLookup:
7631 */
7632 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
7633 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7634
7635 /*
7636 * Emit code to do the actual storing / fetching.
7637 */
7638 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7639# ifdef IEM_WITH_TLB_STATISTICS
7640 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7641 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7642# endif
7643 if (idxRegValue != UINT8_MAX)
7644 {
7645 switch (cbMemAccess)
7646 {
7647 case 2:
7648 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7649 break;
7650 case 4:
7651 if (!fIsIntelSeg)
7652 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7653 else
7654 {
7655 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
7656 PUSH FS in real mode, so we have to try emulate that here.
7657 We borrow the now unused idxReg1 from the TLB lookup code here. */
7658 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
7659 kIemNativeGstReg_EFlags);
7660 if (idxRegEfl != UINT8_MAX)
7661 {
7662#ifdef ARCH_AMD64
7663 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
7664 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7665 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7666#else
7667 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
7668 off, TlbState.idxReg1, idxRegEfl,
7669 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7670#endif
7671 iemNativeRegFreeTmp(pReNative, idxRegEfl);
7672 }
7673 else
7674 {
7675 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
7676 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
7677 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7678 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7679 }
7680 /* ASSUMES the upper half of idxRegValue is ZERO. */
7681 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
7682 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
7683 }
7684 break;
7685 case 8:
7686 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7687 break;
7688 default:
7689 AssertFailed();
7690 }
7691 }
7692 else
7693 {
7694 switch (cbMemAccess)
7695 {
7696 case 2:
7697 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7698 idxRegMemResult, TlbState.idxReg1);
7699 break;
7700 case 4:
7701 Assert(!fIsSegReg);
7702 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7703 idxRegMemResult, TlbState.idxReg1);
7704 break;
7705 case 8:
7706 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
7707 break;
7708 default:
7709 AssertFailed();
7710 }
7711 }
7712
7713 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7714 TlbState.freeRegsAndReleaseVars(pReNative);
7715
7716 /*
7717 * TlbDone:
7718 *
7719 * Commit the new RSP value.
7720 */
7721 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7722 }
7723#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7724
7725#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7726 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
7727#endif
7728 iemNativeRegFreeTmp(pReNative, idxRegRsp);
7729 if (idxRegEffSp != idxRegRsp)
7730 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
7731
7732 /* The value variable is implictly flushed. */
7733 if (idxRegValue != UINT8_MAX)
7734 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7735 iemNativeVarFreeLocal(pReNative, idxVarValue);
7736
7737 return off;
7738}
7739
7740
7741
7742/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
7743#define IEM_MC_POP_GREG_U16(a_iGReg) \
7744 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7745 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
7746#define IEM_MC_POP_GREG_U32(a_iGReg) \
7747 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7748 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
7749#define IEM_MC_POP_GREG_U64(a_iGReg) \
7750 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7751 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
7752
7753#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
7754 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7755 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7756#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
7757 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7758 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
7759
7760#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
7761 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7762 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7763#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
7764 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7765 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
7766
7767
7768DECL_FORCE_INLINE_THROW(uint32_t)
7769iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
7770 uint8_t idxRegTmp)
7771{
7772 /* Use16BitSp: */
7773#ifdef RT_ARCH_AMD64
7774 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7775 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
7776 RT_NOREF(idxRegTmp);
7777#else
7778 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
7779 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
7780 /* add tmp, regrsp, #cbMem */
7781 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
7782 /* and tmp, tmp, #0xffff */
7783 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
7784 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
7785 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
7786 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
7787#endif
7788 return off;
7789}
7790
7791
7792DECL_FORCE_INLINE(uint32_t)
7793iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
7794{
7795 /* Use32BitSp: */
7796 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7797 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
7798 return off;
7799}
7800
7801
7802/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
7803DECL_INLINE_THROW(uint32_t)
7804iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
7805 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7806{
7807 /*
7808 * Assert sanity.
7809 */
7810 Assert(idxGReg < 16);
7811#ifdef VBOX_STRICT
7812 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7813 {
7814 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7815 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7816 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7817 Assert( pfnFunction
7818 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7819 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
7820 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7821 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
7822 : UINT64_C(0xc000b000a0009000) ));
7823 }
7824 else
7825 Assert( pfnFunction
7826 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
7827 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
7828 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
7829 : UINT64_C(0xc000b000a0009000) ));
7830#endif
7831
7832#ifdef VBOX_STRICT
7833 /*
7834 * Check that the fExec flags we've got make sense.
7835 */
7836 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7837#endif
7838
7839 /*
7840 * To keep things simple we have to commit any pending writes first as we
7841 * may end up making calls.
7842 */
7843 off = iemNativeRegFlushPendingWrites(pReNative, off);
7844
7845 /*
7846 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
7847 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
7848 * directly as the effective stack pointer.
7849 * (Code structure is very similar to that of PUSH)
7850 */
7851 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7852 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7853 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7854 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7855 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7856 /** @todo can do a better job picking the register here. For cbMem >= 4 this
7857 * will be the resulting register value. */
7858 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
7859
7860 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7861 if (cBitsFlat != 0)
7862 {
7863 Assert(idxRegEffSp == idxRegRsp);
7864 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7865 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7866 }
7867 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7868 {
7869 Assert(idxRegEffSp != idxRegRsp);
7870 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7871 kIemNativeGstRegUse_ReadOnly);
7872#ifdef RT_ARCH_AMD64
7873 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7874#else
7875 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7876#endif
7877 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7878 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7879 offFixupJumpToUseOtherBitSp = off;
7880 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7881 {
7882/** @todo can skip idxRegRsp updating when popping ESP. */
7883 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7884 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7885 }
7886 else
7887 {
7888 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7889 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7890 }
7891 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7892 }
7893 /* SpUpdateEnd: */
7894 uint32_t const offLabelSpUpdateEnd = off;
7895
7896 /*
7897 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7898 * we're skipping lookup).
7899 */
7900 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7901 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
7902 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7903 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7904 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7905 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7906 : UINT32_MAX;
7907
7908 if (!TlbState.fSkip)
7909 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7910 else
7911 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7912
7913 /*
7914 * Use16BitSp:
7915 */
7916 if (cBitsFlat == 0)
7917 {
7918#ifdef RT_ARCH_AMD64
7919 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7920#else
7921 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7922#endif
7923 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7924 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7925 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7926 else
7927 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7928 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7929 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7930 }
7931
7932 /*
7933 * TlbMiss:
7934 *
7935 * Call helper to do the pushing.
7936 */
7937 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7938
7939#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7940 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7941#else
7942 RT_NOREF(idxInstr);
7943#endif
7944
7945 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7946 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7947 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
7948 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7949
7950
7951 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
7952 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7953 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7954
7955 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7956 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7957
7958 /* Done setting up parameters, make the call. */
7959 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7960
7961 /* Move the return register content to idxRegMemResult. */
7962 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7963 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7964
7965 /* Restore variables and guest shadow registers to volatile registers. */
7966 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7967 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7968
7969#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7970 if (!TlbState.fSkip)
7971 {
7972 /* end of TlbMiss - Jump to the done label. */
7973 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7974 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7975
7976 /*
7977 * TlbLookup:
7978 */
7979 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
7980 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7981
7982 /*
7983 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
7984 */
7985 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7986# ifdef IEM_WITH_TLB_STATISTICS
7987 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7988 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7989# endif
7990 switch (cbMem)
7991 {
7992 case 2:
7993 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7994 break;
7995 case 4:
7996 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7997 break;
7998 case 8:
7999 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8000 break;
8001 default:
8002 AssertFailed();
8003 }
8004
8005 TlbState.freeRegsAndReleaseVars(pReNative);
8006
8007 /*
8008 * TlbDone:
8009 *
8010 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8011 * commit the popped register value.
8012 */
8013 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8014 }
8015#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8016
8017 if (idxGReg != X86_GREG_xSP)
8018 {
8019 /* Set the register. */
8020 if (cbMem >= sizeof(uint32_t))
8021 {
8022#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8023 AssertMsg( pReNative->idxCurCall == 0
8024 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8025 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8026 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8027#endif
8028 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8029#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8030 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8031#endif
8032#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8033 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8034 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8035#endif
8036 }
8037 else
8038 {
8039 Assert(cbMem == sizeof(uint16_t));
8040 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8041 kIemNativeGstRegUse_ForUpdate);
8042 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8043#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8044 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8045#endif
8046 iemNativeRegFreeTmp(pReNative, idxRegDst);
8047 }
8048
8049 /* Complete RSP calculation for FLAT mode. */
8050 if (idxRegEffSp == idxRegRsp)
8051 {
8052 if (cBitsFlat == 64)
8053 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
8054 else
8055 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
8056 }
8057 }
8058 else
8059 {
8060 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8061 if (cbMem == sizeof(uint64_t))
8062 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8063 else if (cbMem == sizeof(uint32_t))
8064 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8065 else
8066 {
8067 if (idxRegEffSp == idxRegRsp)
8068 {
8069 if (cBitsFlat == 64)
8070 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
8071 else
8072 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
8073 }
8074 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8075 }
8076 }
8077
8078#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8079 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8080#endif
8081
8082 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8083 if (idxRegEffSp != idxRegRsp)
8084 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8085 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8086
8087 return off;
8088}
8089
8090
8091
8092/*********************************************************************************************************************************
8093* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8094*********************************************************************************************************************************/
8095
8096#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8097 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8098 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8099 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8100
8101#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8102 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8103 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8104 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8105
8106#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8107 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8108 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8109 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8110
8111#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8112 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8113 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8114 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8115
8116
8117#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8118 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8119 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8120 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8121
8122#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8123 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8124 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8125 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8126
8127#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8128 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8129 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8130 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8131
8132#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8133 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8134 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8135 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8136
8137#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8138 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8139 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8140 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8141
8142
8143#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8144 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8145 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8146 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8147
8148#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8149 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8150 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8151 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8152
8153#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8154 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8155 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8156 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8157
8158#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8159 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8160 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8161 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8162
8163#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8164 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8165 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8166 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8167
8168
8169#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8170 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8171 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8172 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8173
8174#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8175 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8176 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8177 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8178#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8179 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8180 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8181 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8182
8183#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8184 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8185 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8186 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8187
8188#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8189 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8190 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8191 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8192
8193
8194#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8195 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8196 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8197 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8198
8199#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8200 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8201 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8202 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8203
8204
8205#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8206 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8207 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8208 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8209
8210#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8211 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8212 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8213 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8214
8215#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8216 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8217 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8218 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8219
8220#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8221 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8222 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8223 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8224
8225
8226
8227#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8228 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8229 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8230 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8231
8232#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8233 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8234 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8235 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8236
8237#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8238 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8239 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8240 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
8241
8242#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8243 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8244 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8245 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
8246
8247
8248#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8249 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8250 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8251 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
8252
8253#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8254 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8255 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8256 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
8257
8258#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8259 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8260 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8261 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8262
8263#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8264 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8265 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8266 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
8267
8268#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
8269 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
8270 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8271 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8272
8273
8274#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8275 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8276 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8277 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
8278
8279#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8280 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8281 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8282 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
8283
8284#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8285 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8286 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8287 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8288
8289#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8290 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8291 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8292 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
8293
8294#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
8295 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
8296 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8297 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8298
8299
8300#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8301 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8302 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8303 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
8304
8305#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8306 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8307 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8308 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
8309
8310#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8311 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8312 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8313 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8314
8315#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8316 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8317 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8318 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
8319
8320#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
8321 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
8322 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8323 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8324
8325
8326#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
8327 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8328 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8329 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
8330
8331#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
8332 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8333 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8334 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
8335
8336
8337#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8338 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8339 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8340 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
8341
8342#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8343 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8344 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8345 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
8346
8347#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8348 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8349 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8350 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
8351
8352#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8353 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8354 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8355 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
8356
8357
8358DECL_INLINE_THROW(uint32_t)
8359iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
8360 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
8361 uintptr_t pfnFunction, uint8_t idxInstr)
8362{
8363 /*
8364 * Assert sanity.
8365 */
8366 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
8367 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
8368 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
8369 && pVarMem->cbVar == sizeof(void *),
8370 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8371
8372 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8373 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8374 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
8375 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
8376 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8377
8378 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
8379 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
8380 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
8381 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
8382 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8383
8384 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
8385
8386 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
8387
8388#ifdef VBOX_STRICT
8389# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
8390 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
8391 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
8392 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
8393 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
8394# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
8395 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
8396 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
8397 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
8398
8399 if (iSegReg == UINT8_MAX)
8400 {
8401 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8402 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8403 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8404 switch (cbMem)
8405 {
8406 case 1:
8407 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
8408 Assert(!fAlignMaskAndCtl);
8409 break;
8410 case 2:
8411 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
8412 Assert(fAlignMaskAndCtl < 2);
8413 break;
8414 case 4:
8415 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
8416 Assert(fAlignMaskAndCtl < 4);
8417 break;
8418 case 8:
8419 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
8420 Assert(fAlignMaskAndCtl < 8);
8421 break;
8422 case 10:
8423 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
8424 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
8425 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8426 Assert(fAlignMaskAndCtl < 8);
8427 break;
8428 case 16:
8429 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
8430 Assert(fAlignMaskAndCtl < 16);
8431 break;
8432# if 0
8433 case 32:
8434 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
8435 Assert(fAlignMaskAndCtl < 32);
8436 break;
8437 case 64:
8438 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
8439 Assert(fAlignMaskAndCtl < 64);
8440 break;
8441# endif
8442 default: AssertFailed(); break;
8443 }
8444 }
8445 else
8446 {
8447 Assert(iSegReg < 6);
8448 switch (cbMem)
8449 {
8450 case 1:
8451 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
8452 Assert(!fAlignMaskAndCtl);
8453 break;
8454 case 2:
8455 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
8456 Assert(fAlignMaskAndCtl < 2);
8457 break;
8458 case 4:
8459 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
8460 Assert(fAlignMaskAndCtl < 4);
8461 break;
8462 case 8:
8463 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
8464 Assert(fAlignMaskAndCtl < 8);
8465 break;
8466 case 10:
8467 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
8468 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
8469 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8470 Assert(fAlignMaskAndCtl < 8);
8471 break;
8472 case 16:
8473 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
8474 Assert(fAlignMaskAndCtl < 16);
8475 break;
8476# if 0
8477 case 32:
8478 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
8479 Assert(fAlignMaskAndCtl < 32);
8480 break;
8481 case 64:
8482 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
8483 Assert(fAlignMaskAndCtl < 64);
8484 break;
8485# endif
8486 default: AssertFailed(); break;
8487 }
8488 }
8489# undef IEM_MAP_HLP_FN
8490# undef IEM_MAP_HLP_FN_NO_AT
8491#endif
8492
8493#ifdef VBOX_STRICT
8494 /*
8495 * Check that the fExec flags we've got make sense.
8496 */
8497 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8498#endif
8499
8500 /*
8501 * To keep things simple we have to commit any pending writes first as we
8502 * may end up making calls.
8503 */
8504 off = iemNativeRegFlushPendingWrites(pReNative, off);
8505
8506#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8507 /*
8508 * Move/spill/flush stuff out of call-volatile registers.
8509 * This is the easy way out. We could contain this to the tlb-miss branch
8510 * by saving and restoring active stuff here.
8511 */
8512 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
8513 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
8514#endif
8515
8516 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
8517 while the tlb-miss codepath will temporarily put it on the stack.
8518 Set the the type to stack here so we don't need to do it twice below. */
8519 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
8520 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
8521 /** @todo use a tmp register from TlbState, since they'll be free after tlb
8522 * lookup is done. */
8523
8524 /*
8525 * Define labels and allocate the result register (trying for the return
8526 * register if we can).
8527 */
8528 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8529 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
8530 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
8531 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
8532 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
8533 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8534 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8535 : UINT32_MAX;
8536//off=iemNativeEmitBrk(pReNative, off, 0);
8537 /*
8538 * Jump to the TLB lookup code.
8539 */
8540 if (!TlbState.fSkip)
8541 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8542
8543 /*
8544 * TlbMiss:
8545 *
8546 * Call helper to do the fetching.
8547 * We flush all guest register shadow copies here.
8548 */
8549 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
8550
8551#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8552 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8553#else
8554 RT_NOREF(idxInstr);
8555#endif
8556
8557#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8558 /* Save variables in volatile registers. */
8559 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
8560 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8561#endif
8562
8563 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
8564 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
8565#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8566 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
8567#else
8568 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8569#endif
8570
8571 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
8572 if (iSegReg != UINT8_MAX)
8573 {
8574 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
8575 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
8576 }
8577
8578 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
8579 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
8580 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
8581
8582 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8583 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8584
8585 /* Done setting up parameters, make the call. */
8586 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8587
8588 /*
8589 * Put the output in the right registers.
8590 */
8591 Assert(idxRegMemResult == pVarMem->idxReg);
8592 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8593 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8594
8595#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8596 /* Restore variables and guest shadow registers to volatile registers. */
8597 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8598 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8599#endif
8600
8601 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
8602 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
8603
8604#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8605 if (!TlbState.fSkip)
8606 {
8607 /* end of tlbsmiss - Jump to the done label. */
8608 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8609 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8610
8611 /*
8612 * TlbLookup:
8613 */
8614 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
8615 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8616# ifdef IEM_WITH_TLB_STATISTICS
8617 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
8618 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
8619# endif
8620
8621 /* [idxVarUnmapInfo] = 0; */
8622 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
8623
8624 /*
8625 * TlbDone:
8626 */
8627 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8628
8629 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
8630
8631# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8632 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
8633 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8634# endif
8635 }
8636#else
8637 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
8638#endif
8639
8640 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8641 iemNativeVarRegisterRelease(pReNative, idxVarMem);
8642
8643 return off;
8644}
8645
8646
8647#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
8648 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
8649 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
8650
8651#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
8652 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
8653 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
8654
8655#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
8656 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
8657 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
8658
8659#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
8660 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
8661 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
8662
8663DECL_INLINE_THROW(uint32_t)
8664iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
8665 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
8666{
8667 /*
8668 * Assert sanity.
8669 */
8670 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8671#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
8672 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8673#endif
8674 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
8675 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
8676 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
8677#ifdef VBOX_STRICT
8678 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
8679 {
8680 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
8681 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
8682 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
8683 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
8684 case IEM_ACCESS_TYPE_WRITE:
8685 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
8686 case IEM_ACCESS_TYPE_READ:
8687 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
8688 default: AssertFailed();
8689 }
8690#else
8691 RT_NOREF(fAccess);
8692#endif
8693
8694 /*
8695 * To keep things simple we have to commit any pending writes first as we
8696 * may end up making calls (there shouldn't be any at this point, so this
8697 * is just for consistency).
8698 */
8699 /** @todo we could postpone this till we make the call and reload the
8700 * registers after returning from the call. Not sure if that's sensible or
8701 * not, though. */
8702 off = iemNativeRegFlushPendingWrites(pReNative, off);
8703
8704 /*
8705 * Move/spill/flush stuff out of call-volatile registers.
8706 *
8707 * We exclude any register holding the bUnmapInfo variable, as we'll be
8708 * checking it after returning from the call and will free it afterwards.
8709 */
8710 /** @todo save+restore active registers and maybe guest shadows in miss
8711 * scenario. */
8712 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
8713 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
8714
8715 /*
8716 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
8717 * to call the unmap helper function.
8718 *
8719 * The likelyhood of it being zero is higher than for the TLB hit when doing
8720 * the mapping, as a TLB miss for an well aligned and unproblematic memory
8721 * access should also end up with a mapping that won't need special unmapping.
8722 */
8723 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
8724 * should speed up things for the pure interpreter as well when TLBs
8725 * are enabled. */
8726#ifdef RT_ARCH_AMD64
8727 if (pVarUnmapInfo->idxReg == UINT8_MAX)
8728 {
8729 /* test byte [rbp - xxx], 0ffh */
8730 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8731 pbCodeBuf[off++] = 0xf6;
8732 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
8733 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8734 pbCodeBuf[off++] = 0xff;
8735 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8736 }
8737 else
8738#endif
8739 {
8740 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
8741 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
8742 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
8743 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8744 }
8745 uint32_t const offJmpFixup = off;
8746 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
8747
8748 /*
8749 * Call the unmap helper function.
8750 */
8751#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
8752 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8753#else
8754 RT_NOREF(idxInstr);
8755#endif
8756
8757 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
8758 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
8759 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8760
8761 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8762 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8763
8764 /* Done setting up parameters, make the call. */
8765 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8766
8767 /* The bUnmapInfo variable is implictly free by these MCs. */
8768 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
8769
8770 /*
8771 * Done, just fixup the jump for the non-call case.
8772 */
8773 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
8774
8775 return off;
8776}
8777
8778
8779
8780/*********************************************************************************************************************************
8781* State and Exceptions *
8782*********************************************************************************************************************************/
8783
8784#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8785#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8786
8787#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8788#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8789#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8790
8791#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8792#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8793#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8794
8795
8796DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
8797{
8798 /** @todo this needs a lot more work later. */
8799 RT_NOREF(pReNative, fForChange);
8800 return off;
8801}
8802
8803
8804
8805/*********************************************************************************************************************************
8806* Emitters for FPU related operations. *
8807*********************************************************************************************************************************/
8808
8809#define IEM_MC_FETCH_FCW(a_u16Fcw) \
8810 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
8811
8812/** Emits code for IEM_MC_FETCH_FCW. */
8813DECL_INLINE_THROW(uint32_t)
8814iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8815{
8816 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8817 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8818
8819 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8820
8821 /* Allocate a temporary FCW register. */
8822 /** @todo eliminate extra register */
8823 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
8824 kIemNativeGstRegUse_ReadOnly);
8825
8826 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
8827
8828 /* Free but don't flush the FCW register. */
8829 iemNativeRegFreeTmp(pReNative, idxFcwReg);
8830 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8831
8832 return off;
8833}
8834
8835
8836#define IEM_MC_FETCH_FSW(a_u16Fsw) \
8837 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
8838
8839/** Emits code for IEM_MC_FETCH_FSW. */
8840DECL_INLINE_THROW(uint32_t)
8841iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8842{
8843 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8844 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8845
8846 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
8847 /* Allocate a temporary FSW register. */
8848 /** @todo eliminate extra register */
8849 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
8850 kIemNativeGstRegUse_ReadOnly);
8851
8852 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
8853
8854 /* Free but don't flush the FSW register. */
8855 iemNativeRegFreeTmp(pReNative, idxFswReg);
8856 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8857
8858 return off;
8859}
8860
8861
8862
8863#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8864
8865
8866/*********************************************************************************************************************************
8867* Emitters for SSE/AVX specific operations. *
8868*********************************************************************************************************************************/
8869
8870#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
8871 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
8872
8873/** Emits code for IEM_MC_COPY_XREG_U128. */
8874DECL_INLINE_THROW(uint32_t)
8875iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
8876{
8877 /* This is a nop if the source and destination register are the same. */
8878 if (iXRegDst != iXRegSrc)
8879 {
8880 /* Allocate destination and source register. */
8881 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
8882 kIemNativeGstSimdRegLdStSz_Low128,
8883 kIemNativeGstRegUse_ForFullWrite);
8884 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
8885 kIemNativeGstSimdRegLdStSz_Low128,
8886 kIemNativeGstRegUse_ReadOnly);
8887
8888 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8889
8890 /* Free but don't flush the source and destination register. */
8891 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8892 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8893 }
8894
8895 return off;
8896}
8897
8898
8899#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
8900 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
8901
8902/** Emits code for IEM_MC_FETCH_XREG_U128. */
8903DECL_INLINE_THROW(uint32_t)
8904iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
8905{
8906 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8907 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
8908
8909 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8910 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8911
8912 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8913
8914 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8915
8916 /* Free but don't flush the source register. */
8917 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8918 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8919
8920 return off;
8921}
8922
8923
8924#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
8925 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
8926
8927#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
8928 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
8929
8930/** Emits code for IEM_MC_FETCH_XREG_U64. */
8931DECL_INLINE_THROW(uint32_t)
8932iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
8933{
8934 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8935 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
8936
8937 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8938 kIemNativeGstSimdRegLdStSz_Low128,
8939 kIemNativeGstRegUse_ReadOnly);
8940
8941 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8942 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8943
8944 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
8945
8946 /* Free but don't flush the source register. */
8947 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8948 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8949
8950 return off;
8951}
8952
8953
8954#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
8955 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
8956
8957#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
8958 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
8959
8960/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
8961DECL_INLINE_THROW(uint32_t)
8962iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
8963{
8964 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8965 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8966
8967 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8968 kIemNativeGstSimdRegLdStSz_Low128,
8969 kIemNativeGstRegUse_ReadOnly);
8970
8971 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8972 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8973
8974 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
8975
8976 /* Free but don't flush the source register. */
8977 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8978 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8979
8980 return off;
8981}
8982
8983
8984#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
8985 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
8986
8987/** Emits code for IEM_MC_FETCH_XREG_U16. */
8988DECL_INLINE_THROW(uint32_t)
8989iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
8990{
8991 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8992 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8993
8994 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8995 kIemNativeGstSimdRegLdStSz_Low128,
8996 kIemNativeGstRegUse_ReadOnly);
8997
8998 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8999 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9000
9001 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9002
9003 /* Free but don't flush the source register. */
9004 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9005 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9006
9007 return off;
9008}
9009
9010
9011#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9012 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9013
9014/** Emits code for IEM_MC_FETCH_XREG_U8. */
9015DECL_INLINE_THROW(uint32_t)
9016iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9017{
9018 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9019 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9020
9021 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9022 kIemNativeGstSimdRegLdStSz_Low128,
9023 kIemNativeGstRegUse_ReadOnly);
9024
9025 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9026 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9027
9028 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9029
9030 /* Free but don't flush the source register. */
9031 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9032 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9033
9034 return off;
9035}
9036
9037
9038#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9039 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9040
9041AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9042#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9043 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9044
9045
9046/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9047DECL_INLINE_THROW(uint32_t)
9048iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9049{
9050 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9051 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9052
9053 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9054 kIemNativeGstSimdRegLdStSz_Low128,
9055 kIemNativeGstRegUse_ForFullWrite);
9056 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9057
9058 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9059
9060 /* Free but don't flush the source register. */
9061 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9062 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9063
9064 return off;
9065}
9066
9067
9068#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9069 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9070
9071#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9072 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9073
9074#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9075 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9076
9077#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9078 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9079
9080#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9081 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9082
9083#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9084 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9085
9086/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9087DECL_INLINE_THROW(uint32_t)
9088iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9089 uint8_t cbLocal, uint8_t iElem)
9090{
9091 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9092 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
9093
9094#ifdef VBOX_STRICT
9095 switch (cbLocal)
9096 {
9097 case sizeof(uint64_t): Assert(iElem < 2); break;
9098 case sizeof(uint32_t): Assert(iElem < 4); break;
9099 case sizeof(uint16_t): Assert(iElem < 8); break;
9100 case sizeof(uint8_t): Assert(iElem < 16); break;
9101 default: AssertFailed();
9102 }
9103#endif
9104
9105 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9106 kIemNativeGstSimdRegLdStSz_Low128,
9107 kIemNativeGstRegUse_ForUpdate);
9108 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9109
9110 switch (cbLocal)
9111 {
9112 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9113 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9114 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9115 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9116 default: AssertFailed();
9117 }
9118
9119 /* Free but don't flush the source register. */
9120 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9121 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9122
9123 return off;
9124}
9125
9126
9127#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
9128 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
9129
9130/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
9131DECL_INLINE_THROW(uint32_t)
9132iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9133{
9134 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9135 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9136
9137 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9138 kIemNativeGstSimdRegLdStSz_Low128,
9139 kIemNativeGstRegUse_ForUpdate);
9140 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9141
9142 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
9143 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9144 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9145
9146 /* Free but don't flush the source register. */
9147 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9148 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9149
9150 return off;
9151}
9152
9153
9154#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
9155 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
9156
9157/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
9158DECL_INLINE_THROW(uint32_t)
9159iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9160{
9161 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9162 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9163
9164 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9165 kIemNativeGstSimdRegLdStSz_Low128,
9166 kIemNativeGstRegUse_ForUpdate);
9167 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9168
9169 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
9170 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9171 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9172
9173 /* Free but don't flush the source register. */
9174 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9175 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9176
9177 return off;
9178}
9179
9180
9181#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
9182 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
9183
9184/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
9185DECL_INLINE_THROW(uint32_t)
9186iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
9187 uint8_t idxSrcVar, uint8_t iDwSrc)
9188{
9189 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9190 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9191
9192 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9193 kIemNativeGstSimdRegLdStSz_Low128,
9194 kIemNativeGstRegUse_ForUpdate);
9195 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9196
9197 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
9198 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
9199
9200 /* Free but don't flush the destination register. */
9201 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9202 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9203
9204 return off;
9205}
9206
9207
9208#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9209 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9210
9211/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
9212DECL_INLINE_THROW(uint32_t)
9213iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9214{
9215 /*
9216 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
9217 * if iYRegDst gets allocated first for the full write it won't load the
9218 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
9219 * duplicated from the already allocated host register for iYRegDst containing
9220 * garbage. This will be catched by the guest register value checking in debug
9221 * builds.
9222 */
9223 if (iYRegDst != iYRegSrc)
9224 {
9225 /* Allocate destination and source register. */
9226 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9227 kIemNativeGstSimdRegLdStSz_256,
9228 kIemNativeGstRegUse_ForFullWrite);
9229 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9230 kIemNativeGstSimdRegLdStSz_Low128,
9231 kIemNativeGstRegUse_ReadOnly);
9232
9233 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9234 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9235
9236 /* Free but don't flush the source and destination register. */
9237 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9238 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9239 }
9240 else
9241 {
9242 /* This effectively only clears the upper 128-bits of the register. */
9243 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9244 kIemNativeGstSimdRegLdStSz_High128,
9245 kIemNativeGstRegUse_ForFullWrite);
9246
9247 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9248
9249 /* Free but don't flush the destination register. */
9250 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9251 }
9252
9253 return off;
9254}
9255
9256
9257#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9258 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9259
9260/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
9261DECL_INLINE_THROW(uint32_t)
9262iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9263{
9264 /*
9265 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
9266 * if iYRegDst gets allocated first for the full write it won't load the
9267 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
9268 * duplicated from the already allocated host register for iYRegDst containing
9269 * garbage. This will be catched by the guest register value checking in debug
9270 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
9271 * for a zmm register we don't support yet, so this is just a nop.
9272 */
9273 if (iYRegDst != iYRegSrc)
9274 {
9275 /* Allocate destination and source register. */
9276 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9277 kIemNativeGstSimdRegLdStSz_256,
9278 kIemNativeGstRegUse_ReadOnly);
9279 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9280 kIemNativeGstSimdRegLdStSz_256,
9281 kIemNativeGstRegUse_ForFullWrite);
9282
9283 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9284
9285 /* Free but don't flush the source and destination register. */
9286 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9287 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9288 }
9289
9290 return off;
9291}
9292
9293
9294#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
9295 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
9296
9297/** Emits code for IEM_MC_FETCH_YREG_U128. */
9298DECL_INLINE_THROW(uint32_t)
9299iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
9300{
9301 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9302 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9303
9304 Assert(iDQWord <= 1);
9305 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9306 iDQWord == 1
9307 ? kIemNativeGstSimdRegLdStSz_High128
9308 : kIemNativeGstSimdRegLdStSz_Low128,
9309 kIemNativeGstRegUse_ReadOnly);
9310
9311 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9312 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9313
9314 if (iDQWord == 1)
9315 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9316 else
9317 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9318
9319 /* Free but don't flush the source register. */
9320 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9321 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9322
9323 return off;
9324}
9325
9326
9327#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
9328 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
9329
9330/** Emits code for IEM_MC_FETCH_YREG_U64. */
9331DECL_INLINE_THROW(uint32_t)
9332iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
9333{
9334 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9335 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9336
9337 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9338 iQWord >= 2
9339 ? kIemNativeGstSimdRegLdStSz_High128
9340 : kIemNativeGstSimdRegLdStSz_Low128,
9341 kIemNativeGstRegUse_ReadOnly);
9342
9343 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9344 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9345
9346 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9347
9348 /* Free but don't flush the source register. */
9349 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9350 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9351
9352 return off;
9353}
9354
9355
9356#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
9357 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
9358
9359/** Emits code for IEM_MC_FETCH_YREG_U32. */
9360DECL_INLINE_THROW(uint32_t)
9361iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
9362{
9363 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9364 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9365
9366 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9367 iDWord >= 4
9368 ? kIemNativeGstSimdRegLdStSz_High128
9369 : kIemNativeGstSimdRegLdStSz_Low128,
9370 kIemNativeGstRegUse_ReadOnly);
9371
9372 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9373 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9374
9375 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9376
9377 /* Free but don't flush the source register. */
9378 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9379 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9380
9381 return off;
9382}
9383
9384
9385#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
9386 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
9387
9388/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
9389DECL_INLINE_THROW(uint32_t)
9390iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9391{
9392 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9393 kIemNativeGstSimdRegLdStSz_High128,
9394 kIemNativeGstRegUse_ForFullWrite);
9395
9396 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9397
9398 /* Free but don't flush the register. */
9399 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9400
9401 return off;
9402}
9403
9404
9405#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
9406 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
9407
9408/** Emits code for IEM_MC_STORE_YREG_U128. */
9409DECL_INLINE_THROW(uint32_t)
9410iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
9411{
9412 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9413 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9414
9415 Assert(iDQword <= 1);
9416 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9417 iDQword == 0
9418 ? kIemNativeGstSimdRegLdStSz_Low128
9419 : kIemNativeGstSimdRegLdStSz_High128,
9420 kIemNativeGstRegUse_ForFullWrite);
9421
9422 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9423
9424 if (iDQword == 0)
9425 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9426 else
9427 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
9428
9429 /* Free but don't flush the source register. */
9430 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9431 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9432
9433 return off;
9434}
9435
9436
9437#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9438 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9439
9440/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
9441DECL_INLINE_THROW(uint32_t)
9442iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9443{
9444 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9445 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9446
9447 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9448 kIemNativeGstSimdRegLdStSz_256,
9449 kIemNativeGstRegUse_ForFullWrite);
9450
9451 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9452
9453 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9454 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9455
9456 /* Free but don't flush the source register. */
9457 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9458 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9459
9460 return off;
9461}
9462
9463
9464#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
9465 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
9466
9467/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
9468DECL_INLINE_THROW(uint32_t)
9469iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9470{
9471 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9472 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9473
9474 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9475 kIemNativeGstSimdRegLdStSz_256,
9476 kIemNativeGstRegUse_ForFullWrite);
9477
9478 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9479
9480 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9481 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9482
9483 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9484 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9485
9486 return off;
9487}
9488
9489
9490#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
9491 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
9492
9493/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
9494DECL_INLINE_THROW(uint32_t)
9495iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9496{
9497 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9498 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9499
9500 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9501 kIemNativeGstSimdRegLdStSz_256,
9502 kIemNativeGstRegUse_ForFullWrite);
9503
9504 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9505
9506 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9507 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9508
9509 /* Free but don't flush the source register. */
9510 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9511 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9512
9513 return off;
9514}
9515
9516
9517#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
9518 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
9519
9520/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
9521DECL_INLINE_THROW(uint32_t)
9522iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9523{
9524 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9525 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9526
9527 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9528 kIemNativeGstSimdRegLdStSz_256,
9529 kIemNativeGstRegUse_ForFullWrite);
9530
9531 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9532
9533 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9534 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9535
9536 /* Free but don't flush the source register. */
9537 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9538 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9539
9540 return off;
9541}
9542
9543
9544#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
9545 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
9546
9547/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
9548DECL_INLINE_THROW(uint32_t)
9549iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9550{
9551 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9552 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9553
9554 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9555 kIemNativeGstSimdRegLdStSz_256,
9556 kIemNativeGstRegUse_ForFullWrite);
9557
9558 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9559
9560 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9561 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9562
9563 /* Free but don't flush the source register. */
9564 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9565 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9566
9567 return off;
9568}
9569
9570
9571#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
9572 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
9573
9574/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
9575DECL_INLINE_THROW(uint32_t)
9576iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9577{
9578 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9579 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9580
9581 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9582 kIemNativeGstSimdRegLdStSz_256,
9583 kIemNativeGstRegUse_ForFullWrite);
9584
9585 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9586
9587 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9588
9589 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9590 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9591
9592 return off;
9593}
9594
9595
9596#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
9597 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
9598
9599/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
9600DECL_INLINE_THROW(uint32_t)
9601iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9602{
9603 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9604 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9605
9606 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9607 kIemNativeGstSimdRegLdStSz_256,
9608 kIemNativeGstRegUse_ForFullWrite);
9609
9610 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9611
9612 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9613
9614 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9615 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9616
9617 return off;
9618}
9619
9620
9621#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9622 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9623
9624/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
9625DECL_INLINE_THROW(uint32_t)
9626iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9627{
9628 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9629 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9630
9631 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9632 kIemNativeGstSimdRegLdStSz_256,
9633 kIemNativeGstRegUse_ForFullWrite);
9634
9635 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9636
9637 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9638
9639 /* Free but don't flush the source register. */
9640 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9641 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9642
9643 return off;
9644}
9645
9646
9647#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9648 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9649
9650/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
9651DECL_INLINE_THROW(uint32_t)
9652iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9653{
9654 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9655 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9656
9657 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9658 kIemNativeGstSimdRegLdStSz_256,
9659 kIemNativeGstRegUse_ForFullWrite);
9660
9661 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9662
9663 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9664
9665 /* Free but don't flush the source register. */
9666 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9667 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9668
9669 return off;
9670}
9671
9672
9673#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9674 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9675
9676/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
9677DECL_INLINE_THROW(uint32_t)
9678iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9679{
9680 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9681 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9682
9683 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9684 kIemNativeGstSimdRegLdStSz_256,
9685 kIemNativeGstRegUse_ForFullWrite);
9686
9687 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9688
9689 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
9690
9691 /* Free but don't flush the source register. */
9692 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9693 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9694
9695 return off;
9696}
9697
9698
9699#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9700 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9701
9702/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
9703DECL_INLINE_THROW(uint32_t)
9704iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9705{
9706 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9707 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9708
9709 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9710 kIemNativeGstSimdRegLdStSz_256,
9711 kIemNativeGstRegUse_ForFullWrite);
9712
9713 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9714
9715 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9716 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
9717
9718 /* Free but don't flush the source register. */
9719 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9720 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9721
9722 return off;
9723}
9724
9725
9726#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9727 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9728
9729/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
9730DECL_INLINE_THROW(uint32_t)
9731iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9732{
9733 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9734 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9735
9736 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9737 kIemNativeGstSimdRegLdStSz_256,
9738 kIemNativeGstRegUse_ForFullWrite);
9739
9740 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9741
9742 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9743 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9744
9745 /* Free but don't flush the source register. */
9746 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9747 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9748
9749 return off;
9750}
9751
9752
9753#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
9754 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
9755
9756/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
9757DECL_INLINE_THROW(uint32_t)
9758iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
9759{
9760 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9761 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9762
9763 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9764 kIemNativeGstSimdRegLdStSz_256,
9765 kIemNativeGstRegUse_ForFullWrite);
9766 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9767 kIemNativeGstSimdRegLdStSz_Low128,
9768 kIemNativeGstRegUse_ReadOnly);
9769 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9770
9771 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9772 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9773 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9774
9775 /* Free but don't flush the source and destination registers. */
9776 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9777 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9778 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9779
9780 return off;
9781}
9782
9783
9784#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
9785 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
9786
9787/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
9788DECL_INLINE_THROW(uint32_t)
9789iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
9790{
9791 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9792 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9793
9794 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9795 kIemNativeGstSimdRegLdStSz_256,
9796 kIemNativeGstRegUse_ForFullWrite);
9797 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9798 kIemNativeGstSimdRegLdStSz_Low128,
9799 kIemNativeGstRegUse_ReadOnly);
9800 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9801
9802 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9803 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
9804 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9805
9806 /* Free but don't flush the source and destination registers. */
9807 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9808 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9809 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9810
9811 return off;
9812}
9813
9814
9815#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
9816 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
9817
9818
9819/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
9820DECL_INLINE_THROW(uint32_t)
9821iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
9822{
9823 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9824 kIemNativeGstSimdRegLdStSz_Low128,
9825 kIemNativeGstRegUse_ForUpdate);
9826
9827 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
9828 if (bImm8Mask & RT_BIT(0))
9829 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
9830 if (bImm8Mask & RT_BIT(1))
9831 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
9832 if (bImm8Mask & RT_BIT(2))
9833 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
9834 if (bImm8Mask & RT_BIT(3))
9835 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
9836
9837 /* Free but don't flush the destination register. */
9838 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9839
9840 return off;
9841}
9842
9843
9844#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
9845 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
9846
9847#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
9848 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
9849
9850/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
9851DECL_INLINE_THROW(uint32_t)
9852iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
9853{
9854 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9855 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
9856
9857 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9858 kIemNativeGstSimdRegLdStSz_256,
9859 kIemNativeGstRegUse_ReadOnly);
9860 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9861
9862 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
9863
9864 /* Free but don't flush the source register. */
9865 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9866 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9867
9868 return off;
9869}
9870
9871
9872#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
9873 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
9874
9875#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
9876 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
9877
9878/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
9879DECL_INLINE_THROW(uint32_t)
9880iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
9881{
9882 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9883 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9884
9885 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9886 kIemNativeGstSimdRegLdStSz_256,
9887 kIemNativeGstRegUse_ForFullWrite);
9888 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9889
9890 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
9891
9892 /* Free but don't flush the source register. */
9893 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9894 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9895
9896 return off;
9897}
9898
9899
9900#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
9901 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
9902
9903
9904/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
9905DECL_INLINE_THROW(uint32_t)
9906iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
9907 uint8_t idxSrcVar, uint8_t iDwSrc)
9908{
9909 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9910 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9911
9912 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9913 iDwDst < 4
9914 ? kIemNativeGstSimdRegLdStSz_Low128
9915 : kIemNativeGstSimdRegLdStSz_High128,
9916 kIemNativeGstRegUse_ForUpdate);
9917 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9918 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
9919
9920 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
9921 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
9922
9923 /* Free but don't flush the source register. */
9924 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9925 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9926 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9927
9928 return off;
9929}
9930
9931
9932#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
9933 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
9934
9935
9936/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
9937DECL_INLINE_THROW(uint32_t)
9938iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
9939 uint8_t idxSrcVar, uint8_t iQwSrc)
9940{
9941 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9942 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9943
9944 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9945 iQwDst < 2
9946 ? kIemNativeGstSimdRegLdStSz_Low128
9947 : kIemNativeGstSimdRegLdStSz_High128,
9948 kIemNativeGstRegUse_ForUpdate);
9949 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9950 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
9951
9952 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
9953 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
9954
9955 /* Free but don't flush the source register. */
9956 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9957 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9958 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9959
9960 return off;
9961}
9962
9963
9964#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
9965 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
9966
9967
9968/** Emits code for IEM_MC_STORE_YREG_U64. */
9969DECL_INLINE_THROW(uint32_t)
9970iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
9971{
9972 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9973 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9974
9975 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9976 iQwDst < 2
9977 ? kIemNativeGstSimdRegLdStSz_Low128
9978 : kIemNativeGstSimdRegLdStSz_High128,
9979 kIemNativeGstRegUse_ForUpdate);
9980
9981 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9982
9983 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
9984
9985 /* Free but don't flush the source register. */
9986 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9987 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9988
9989 return off;
9990}
9991
9992
9993#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
9994 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
9995
9996/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
9997DECL_INLINE_THROW(uint32_t)
9998iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9999{
10000 RT_NOREF(pReNative, iYReg);
10001 /** @todo Needs to be implemented when support for AVX-512 is added. */
10002 return off;
10003}
10004
10005
10006
10007/*********************************************************************************************************************************
10008* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10009*********************************************************************************************************************************/
10010
10011/**
10012 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10013 */
10014DECL_INLINE_THROW(uint32_t)
10015iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
10016{
10017 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10018 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10019 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10020 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10021
10022 /*
10023 * Need to do the FPU preparation.
10024 */
10025 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10026
10027 /*
10028 * Do all the call setup and cleanup.
10029 */
10030 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10031 false /*fFlushPendingWrites*/);
10032
10033 /*
10034 * Load the MXCSR register into the first argument and mask out the current exception flags.
10035 */
10036 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10037 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10038
10039 /*
10040 * Make the call.
10041 */
10042 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10043
10044 /*
10045 * The updated MXCSR is in the return register.
10046 */
10047 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegMxCsr, IEMNATIVE_CALL_RET_GREG);
10048
10049#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10050 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10051 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
10052#endif
10053 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
10054
10055 return off;
10056}
10057
10058
10059#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
10060 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10061
10062/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
10063DECL_INLINE_THROW(uint32_t)
10064iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10065{
10066 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10067 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10068 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
10069}
10070
10071
10072#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10073 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10074
10075/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
10076DECL_INLINE_THROW(uint32_t)
10077iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl,
10078 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10079{
10080 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10081 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10082 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10083 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
10084}
10085
10086
10087/*********************************************************************************************************************************
10088* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
10089*********************************************************************************************************************************/
10090
10091#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
10092 off = iemNativeEmitCallAvxAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10093
10094/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
10095DECL_INLINE_THROW(uint32_t)
10096iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10097{
10098 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10099 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10100 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
10101}
10102
10103
10104#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10105 off = iemNativeEmitCallAvxAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10106
10107/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
10108DECL_INLINE_THROW(uint32_t)
10109iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl,
10110 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10111{
10112 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10113 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10114 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10115 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
10116}
10117
10118
10119#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
10120
10121
10122/*********************************************************************************************************************************
10123* Include instruction emitters. *
10124*********************************************************************************************************************************/
10125#include "target-x86/IEMAllN8veEmit-x86.h"
10126
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette