VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 105655

Last change on this file since 105655 was 105655, checked in by vboxsync, 7 months ago

VMM/IEM: Fix adjusting the stack pointer when recompiling a pop instruction, in 64-bit code a 0x66 prefix indicates a 16-bit pop and the stack pointer should only increase by 2, bugref:10741

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 500.6 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 105655 2024-08-13 07:06:03Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
82# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
83#endif
84
85
86/*********************************************************************************************************************************
87* Code emitters for flushing pending guest register writes and sanity checks *
88*********************************************************************************************************************************/
89
90#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
91# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
92DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
93{
94 /* Compare the shadow with the context value, they should match. */
95 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
96 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
97 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
98 return off;
99}
100# endif
101#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
102
103/**
104 * Flushes delayed write of a specific guest register.
105 *
106 * This must be called prior to calling CImpl functions and any helpers that use
107 * the guest state (like raising exceptions) and such.
108 *
109 * This optimization has not yet been implemented. The first target would be
110 * RIP updates, since these are the most common ones.
111 */
112DECL_INLINE_THROW(uint32_t)
113iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
114{
115#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
116 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
117#endif
118
119#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
120#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
121 if ( enmClass == kIemNativeGstRegRef_EFlags
122 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
123 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
124#else
125 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
126#endif
127
128 if ( enmClass == kIemNativeGstRegRef_Gpr
129 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
130 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
131#endif
132
133#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
134 if ( enmClass == kIemNativeGstRegRef_XReg
135 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
136 {
137 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
138 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
139 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
140
141 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
142 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
143 }
144#endif
145 RT_NOREF(pReNative, enmClass, idxReg);
146 return off;
147}
148
149
150
151/*********************************************************************************************************************************
152* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
153*********************************************************************************************************************************/
154
155#undef IEM_MC_BEGIN /* unused */
156#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
157 { \
158 Assert(pReNative->Core.bmVars == 0); \
159 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
160 Assert(pReNative->Core.bmStack == 0); \
161 pReNative->fMc = (a_fMcFlags); \
162 pReNative->fCImpl = (a_fCImplFlags); \
163 pReNative->cArgsX = (a_cArgsIncludingHidden)
164
165/** We have to get to the end in recompilation mode, as otherwise we won't
166 * generate code for all the IEM_MC_IF_XXX branches. */
167#define IEM_MC_END() \
168 iemNativeVarFreeAll(pReNative); \
169 } return off
170
171
172
173/*********************************************************************************************************************************
174* Native Emitter Support. *
175*********************************************************************************************************************************/
176
177#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
178
179#define IEM_MC_NATIVE_ELSE() } else {
180
181#define IEM_MC_NATIVE_ENDIF() } ((void)0)
182
183
184#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
185 off = a_fnEmitter(pReNative, off)
186
187#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
188 off = a_fnEmitter(pReNative, off, (a0))
189
190#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1))
192
193#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
194 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
195
196#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
198
199#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
201
202#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
204
205#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
207
208#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
209 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
210
211#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
212 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
213
214
215#ifndef RT_ARCH_AMD64
216# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
217#else
218/** @note This is a naive approach that ASSUMES that the register isn't
219 * allocated, so it only works safely for the first allocation(s) in
220 * a MC block. */
221# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
222 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
223
224DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
225
226DECL_INLINE_THROW(uint32_t)
227iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
228{
229 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
230 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
231 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
232
233# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
234 /* Must flush the register if it hold pending writes. */
235 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
236 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
237 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
238# endif
239
240 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
241 return off;
242}
243
244#endif /* RT_ARCH_AMD64 */
245
246
247
248/*********************************************************************************************************************************
249* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
250*********************************************************************************************************************************/
251
252#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
253 pReNative->fMc = 0; \
254 pReNative->fCImpl = (a_fFlags); \
255 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
256 a_cbInstr) /** @todo not used ... */
257
258
259#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
260 pReNative->fMc = 0; \
261 pReNative->fCImpl = (a_fFlags); \
262 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
263
264DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
265 uint8_t idxInstr, uint64_t a_fGstShwFlush,
266 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
267{
268 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
269}
270
271
272#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
273 pReNative->fMc = 0; \
274 pReNative->fCImpl = (a_fFlags); \
275 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
276 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
277
278DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
279 uint8_t idxInstr, uint64_t a_fGstShwFlush,
280 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
281{
282 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
283}
284
285
286#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
287 pReNative->fMc = 0; \
288 pReNative->fCImpl = (a_fFlags); \
289 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
290 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
291
292DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
293 uint8_t idxInstr, uint64_t a_fGstShwFlush,
294 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
295 uint64_t uArg2)
296{
297 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
298}
299
300
301
302/*********************************************************************************************************************************
303* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
304*********************************************************************************************************************************/
305
306/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
307 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
308DECL_INLINE_THROW(uint32_t)
309iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
310{
311 /*
312 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
313 * return with special status code and make the execution loop deal with
314 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
315 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
316 * could continue w/o interruption, it probably will drop into the
317 * debugger, so not worth the effort of trying to services it here and we
318 * just lump it in with the handling of the others.
319 *
320 * To simplify the code and the register state management even more (wrt
321 * immediate in AND operation), we always update the flags and skip the
322 * extra check associated conditional jump.
323 */
324 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
325 <= UINT32_MAX);
326#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
327 AssertMsg( pReNative->idxCurCall == 0
328 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
329 IEMLIVENESSBIT_IDX_EFL_OTHER)),
330 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
331 IEMLIVENESSBIT_IDX_EFL_OTHER)));
332#endif
333
334 /*
335 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
336 * any pending register writes must be flushed.
337 */
338 off = iemNativeRegFlushPendingWrites(pReNative, off);
339
340 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
341 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
342 true /*fSkipLivenessAssert*/);
343 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxEflReg,
344 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
345 kIemNativeLabelType_ReturnWithFlags);
346 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
347 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
348
349 /* Free but don't flush the EFLAGS register. */
350 iemNativeRegFreeTmp(pReNative, idxEflReg);
351
352 return off;
353}
354
355
356/** Helper for iemNativeEmitFinishInstructionWithStatus. */
357DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
358{
359 unsigned const offOpcodes = pCallEntry->offOpcode;
360 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
361 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
362 {
363 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
364 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
365 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
366 }
367 AssertFailedReturn(NIL_RTGCPHYS);
368}
369
370
371/** The VINF_SUCCESS dummy. */
372template<int const a_rcNormal, bool const a_fIsJump>
373DECL_FORCE_INLINE_THROW(uint32_t)
374iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
375 int32_t const offJump)
376{
377 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
378 if (a_rcNormal != VINF_SUCCESS)
379 {
380#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
381 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
382#else
383 RT_NOREF_PV(pCallEntry);
384#endif
385
386 /* As this code returns from the TB any pending register writes must be flushed. */
387 off = iemNativeRegFlushPendingWrites(pReNative, off);
388
389 /*
390 * Use the lookup table for getting to the next TB quickly.
391 * Note! In this code path there can only be one entry at present.
392 */
393 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
394 PCIEMTB const pTbOrg = pReNative->pTbOrg;
395 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
396 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
397
398#if 0
399 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
400 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
401 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
402 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
403 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
404
405 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
406
407#else
408 /* Load the index as argument #1 for the helper call at the given label. */
409 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
410
411 /*
412 * Figure out the physical address of the current instruction and see
413 * whether the next instruction we're about to execute is in the same
414 * page so we by can optimistically skip TLB loading.
415 *
416 * - This is safe for all cases in FLAT mode.
417 * - In segmentmented modes it is complicated, given that a negative
418 * jump may underflow EIP and a forward jump may overflow or run into
419 * CS.LIM and triggering a #GP. The only thing we can get away with
420 * now at compile time is forward jumps w/o CS.LIM checks, since the
421 * lack of CS.LIM checks means we're good for the entire physical page
422 * we're executing on and another 15 bytes before we run into CS.LIM.
423 */
424 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
425# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
426 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
427# endif
428 )
429 {
430 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
431 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
432 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
433 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
434
435 {
436 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
437 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
438
439 /* Load the key lookup flags into the 2nd argument for the helper call.
440 - This is safe wrt CS limit checking since we're only here for FLAT modes.
441 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
442 interrupt shadow.
443 - The NMI inhibiting is more questionable, though... */
444 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
445 * Should we copy it into fExec to simplify this? OTOH, it's just a
446 * couple of extra instructions if EFLAGS are already in a register. */
447 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
448 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
449
450 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
451 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookup);
452 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithIrq);
453 }
454 }
455 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
456 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlb);
457 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq);
458#endif
459 }
460 return off;
461}
462
463
464#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
465 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
466 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
467
468#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
469 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
470 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
471 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
472
473/** Same as iemRegAddToRip64AndFinishingNoFlags. */
474DECL_INLINE_THROW(uint32_t)
475iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
476{
477#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
478# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
479 if (!pReNative->Core.offPc)
480 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
481# endif
482
483 /* Allocate a temporary PC register. */
484 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
485
486 /* Perform the addition and store the result. */
487 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
488 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
489
490 /* Free but don't flush the PC register. */
491 iemNativeRegFreeTmp(pReNative, idxPcReg);
492#endif
493
494#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
495 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
496
497 pReNative->Core.offPc += cbInstr;
498# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
499 off = iemNativePcAdjustCheck(pReNative, off);
500# endif
501 if (pReNative->cCondDepth)
502 off = iemNativeEmitPcWriteback(pReNative, off);
503 else
504 pReNative->Core.cInstrPcUpdateSkipped++;
505#endif
506
507 return off;
508}
509
510
511#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
512 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
513 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
514
515#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
516 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
517 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
518 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
519
520/** Same as iemRegAddToEip32AndFinishingNoFlags. */
521DECL_INLINE_THROW(uint32_t)
522iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
523{
524#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
525# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
526 if (!pReNative->Core.offPc)
527 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
528# endif
529
530 /* Allocate a temporary PC register. */
531 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
532
533 /* Perform the addition and store the result. */
534 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
535 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
536
537 /* Free but don't flush the PC register. */
538 iemNativeRegFreeTmp(pReNative, idxPcReg);
539#endif
540
541#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
542 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
543
544 pReNative->Core.offPc += cbInstr;
545# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
546 off = iemNativePcAdjustCheck(pReNative, off);
547# endif
548 if (pReNative->cCondDepth)
549 off = iemNativeEmitPcWriteback(pReNative, off);
550 else
551 pReNative->Core.cInstrPcUpdateSkipped++;
552#endif
553
554 return off;
555}
556
557
558#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
559 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
560 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
561
562#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
563 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
564 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
565 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
566
567/** Same as iemRegAddToIp16AndFinishingNoFlags. */
568DECL_INLINE_THROW(uint32_t)
569iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
570{
571#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
572# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
573 if (!pReNative->Core.offPc)
574 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
575# endif
576
577 /* Allocate a temporary PC register. */
578 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
579
580 /* Perform the addition and store the result. */
581 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
582 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
583 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
584
585 /* Free but don't flush the PC register. */
586 iemNativeRegFreeTmp(pReNative, idxPcReg);
587#endif
588
589#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
590 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
591
592 pReNative->Core.offPc += cbInstr;
593# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
594 off = iemNativePcAdjustCheck(pReNative, off);
595# endif
596 if (pReNative->cCondDepth)
597 off = iemNativeEmitPcWriteback(pReNative, off);
598 else
599 pReNative->Core.cInstrPcUpdateSkipped++;
600#endif
601
602 return off;
603}
604
605
606
607/*********************************************************************************************************************************
608* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
609*********************************************************************************************************************************/
610
611#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
612 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
613 (a_enmEffOpSize), pCallEntry->idxInstr); \
614 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
615
616#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
617 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
618 (a_enmEffOpSize), pCallEntry->idxInstr); \
619 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
620 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
621
622#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
623 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
624 IEMMODE_16BIT, pCallEntry->idxInstr); \
625 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
626
627#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
628 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
629 IEMMODE_16BIT, pCallEntry->idxInstr); \
630 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
631 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
632
633#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
634 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
635 IEMMODE_64BIT, pCallEntry->idxInstr); \
636 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
637
638#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
639 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
640 IEMMODE_64BIT, pCallEntry->idxInstr); \
641 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
642 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
643
644/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
645 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
646 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
647DECL_INLINE_THROW(uint32_t)
648iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
649 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
650{
651 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
652
653 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
654 off = iemNativeRegFlushPendingWrites(pReNative, off);
655
656#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
657 Assert(pReNative->Core.offPc == 0);
658
659 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
660#endif
661
662 /* Allocate a temporary PC register. */
663 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
664
665 /* Perform the addition. */
666 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
667
668 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
669 {
670 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
671 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
672 }
673 else
674 {
675 /* Just truncate the result to 16-bit IP. */
676 Assert(enmEffOpSize == IEMMODE_16BIT);
677 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
678 }
679 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
680
681 /* Free but don't flush the PC register. */
682 iemNativeRegFreeTmp(pReNative, idxPcReg);
683
684 return off;
685}
686
687
688#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
689 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
690 (a_enmEffOpSize), pCallEntry->idxInstr); \
691 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
692
693#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
694 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
695 (a_enmEffOpSize), pCallEntry->idxInstr); \
696 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
697 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
698
699#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
700 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
701 IEMMODE_16BIT, pCallEntry->idxInstr); \
702 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
703
704#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
705 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
706 IEMMODE_16BIT, pCallEntry->idxInstr); \
707 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
708 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
709
710#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
711 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
712 IEMMODE_32BIT, pCallEntry->idxInstr); \
713 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
714
715#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
716 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
717 IEMMODE_32BIT, pCallEntry->idxInstr); \
718 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
719 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
720
721/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
722 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
723 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
724DECL_INLINE_THROW(uint32_t)
725iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
726 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
727{
728 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
729
730 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
731 off = iemNativeRegFlushPendingWrites(pReNative, off);
732
733#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
734 Assert(pReNative->Core.offPc == 0);
735
736 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
737#endif
738
739 /* Allocate a temporary PC register. */
740 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
741
742 /* Perform the addition. */
743 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
744
745 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
746 if (enmEffOpSize == IEMMODE_16BIT)
747 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
748
749 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
750/** @todo we can skip this in 32-bit FLAT mode. */
751 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
752
753 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
754
755 /* Free but don't flush the PC register. */
756 iemNativeRegFreeTmp(pReNative, idxPcReg);
757
758 return off;
759}
760
761
762#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
763 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
764 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
765
766#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
767 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
768 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
769 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
770
771#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
772 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
773 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
774
775#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
776 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
777 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
778 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
779
780#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
781 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
782 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
783
784#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
785 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
786 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
787 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
788
789/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
790DECL_INLINE_THROW(uint32_t)
791iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
792 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
793{
794 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
795 off = iemNativeRegFlushPendingWrites(pReNative, off);
796
797#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
798 Assert(pReNative->Core.offPc == 0);
799
800 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
801#endif
802
803 /* Allocate a temporary PC register. */
804 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
805
806 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
807 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
808 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
809 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
810 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
811
812 /* Free but don't flush the PC register. */
813 iemNativeRegFreeTmp(pReNative, idxPcReg);
814
815 return off;
816}
817
818
819
820/*********************************************************************************************************************************
821* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
822*********************************************************************************************************************************/
823
824/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
825#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
826 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
827
828/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
829#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
830 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
831
832/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
833#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
834 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
835
836/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
837 * clears flags. */
838#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
839 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
840 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
841
842/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
843 * clears flags. */
844#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
845 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
846 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
847
848/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
849 * clears flags. */
850#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
851 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
852 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
853
854#undef IEM_MC_SET_RIP_U16_AND_FINISH
855
856
857/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
858#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
859 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
860
861/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
862#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
863 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
864
865/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
866 * clears flags. */
867#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
868 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
869 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
870
871/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
872 * and clears flags. */
873#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
874 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
875 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
876
877#undef IEM_MC_SET_RIP_U32_AND_FINISH
878
879
880/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
881#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
882 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
883
884/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
885 * and clears flags. */
886#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
887 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
888 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
889
890#undef IEM_MC_SET_RIP_U64_AND_FINISH
891
892
893/** Same as iemRegRipJumpU16AndFinishNoFlags,
894 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
895DECL_INLINE_THROW(uint32_t)
896iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
897 uint8_t idxInstr, uint8_t cbVar)
898{
899 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
900 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
901
902 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
903 off = iemNativeRegFlushPendingWrites(pReNative, off);
904
905#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
906 Assert(pReNative->Core.offPc == 0);
907
908 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
909#endif
910
911 /* Get a register with the new PC loaded from idxVarPc.
912 Note! This ASSUMES that the high bits of the GPR is zeroed. */
913 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
914
915 /* Check limit (may #GP(0) + exit TB). */
916 if (!f64Bit)
917/** @todo we can skip this test in FLAT 32-bit mode. */
918 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
919 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
920 else if (cbVar > sizeof(uint32_t))
921 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
922
923 /* Store the result. */
924 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
925
926 iemNativeVarRegisterRelease(pReNative, idxVarPc);
927 /** @todo implictly free the variable? */
928
929 return off;
930}
931
932
933
934/*********************************************************************************************************************************
935* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
936*********************************************************************************************************************************/
937
938/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
939 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
940DECL_FORCE_INLINE_THROW(uint32_t)
941iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
942{
943 /* Use16BitSp: */
944#ifdef RT_ARCH_AMD64
945 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
946 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
947#else
948 /* sub regeff, regrsp, #cbMem */
949 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
950 /* and regeff, regeff, #0xffff */
951 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
952 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
953 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
954 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
955#endif
956 return off;
957}
958
959
960DECL_FORCE_INLINE(uint32_t)
961iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
962{
963 /* Use32BitSp: */
964 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
965 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
966 return off;
967}
968
969
970DECL_INLINE_THROW(uint32_t)
971iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
972 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
973{
974 /*
975 * Assert sanity.
976 */
977#ifdef VBOX_STRICT
978 if (RT_BYTE2(cBitsVarAndFlat) != 0)
979 {
980 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
981 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
982 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
983 Assert( pfnFunction
984 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
985 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
986 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
987 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
988 : UINT64_C(0xc000b000a0009000) ));
989 }
990 else
991 Assert( pfnFunction
992 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
993 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
994 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
995 : UINT64_C(0xc000b000a0009000) ));
996#endif
997
998#ifdef VBOX_STRICT
999 /*
1000 * Check that the fExec flags we've got make sense.
1001 */
1002 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1003#endif
1004
1005 /*
1006 * To keep things simple we have to commit any pending writes first as we
1007 * may end up making calls.
1008 */
1009 /** @todo we could postpone this till we make the call and reload the
1010 * registers after returning from the call. Not sure if that's sensible or
1011 * not, though. */
1012 off = iemNativeRegFlushPendingWrites(pReNative, off);
1013
1014 /*
1015 * First we calculate the new RSP and the effective stack pointer value.
1016 * For 64-bit mode and flat 32-bit these two are the same.
1017 * (Code structure is very similar to that of PUSH)
1018 */
1019 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1020 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1021 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1022 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1023 ? cbMem : sizeof(uint16_t);
1024 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1025 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1026 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1027 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1028 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1029 if (cBitsFlat != 0)
1030 {
1031 Assert(idxRegEffSp == idxRegRsp);
1032 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1033 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1034 if (cBitsFlat == 64)
1035 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1036 else
1037 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1038 }
1039 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1040 {
1041 Assert(idxRegEffSp != idxRegRsp);
1042 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1043 kIemNativeGstRegUse_ReadOnly);
1044#ifdef RT_ARCH_AMD64
1045 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1046#else
1047 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1048#endif
1049 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1050 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1051 offFixupJumpToUseOtherBitSp = off;
1052 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1053 {
1054 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1055 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1056 }
1057 else
1058 {
1059 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1060 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1061 }
1062 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1063 }
1064 /* SpUpdateEnd: */
1065 uint32_t const offLabelSpUpdateEnd = off;
1066
1067 /*
1068 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1069 * we're skipping lookup).
1070 */
1071 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1072 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1073 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1074 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1075 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1076 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1077 : UINT32_MAX;
1078 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1079
1080
1081 if (!TlbState.fSkip)
1082 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1083 else
1084 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1085
1086 /*
1087 * Use16BitSp:
1088 */
1089 if (cBitsFlat == 0)
1090 {
1091#ifdef RT_ARCH_AMD64
1092 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1093#else
1094 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1095#endif
1096 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1097 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1098 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1099 else
1100 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1101 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1102 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1103 }
1104
1105 /*
1106 * TlbMiss:
1107 *
1108 * Call helper to do the pushing.
1109 */
1110 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1111
1112#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1113 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1114#else
1115 RT_NOREF(idxInstr);
1116#endif
1117
1118 /* Save variables in volatile registers. */
1119 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1120 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1121 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1122 | (RT_BIT_32(idxRegPc));
1123 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1124
1125 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1126 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1127 {
1128 /* Swap them using ARG0 as temp register: */
1129 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1130 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1131 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1132 }
1133 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1134 {
1135 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1136 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1137
1138 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1139 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1140 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1141 }
1142 else
1143 {
1144 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1145 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1146
1147 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1148 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1149 }
1150
1151 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1152 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1153
1154 /* Done setting up parameters, make the call. */
1155 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1156
1157 /* Restore variables and guest shadow registers to volatile registers. */
1158 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1159 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1160
1161#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1162 if (!TlbState.fSkip)
1163 {
1164 /* end of TlbMiss - Jump to the done label. */
1165 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1166 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1167
1168 /*
1169 * TlbLookup:
1170 */
1171 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1172 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1173
1174 /*
1175 * Emit code to do the actual storing / fetching.
1176 */
1177 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1178# ifdef IEM_WITH_TLB_STATISTICS
1179 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1180 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1181# endif
1182 switch (cbMemAccess)
1183 {
1184 case 2:
1185 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1186 break;
1187 case 4:
1188 if (!fIsIntelSeg)
1189 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1190 else
1191 {
1192 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1193 PUSH FS in real mode, so we have to try emulate that here.
1194 We borrow the now unused idxReg1 from the TLB lookup code here. */
1195 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1196 kIemNativeGstReg_EFlags);
1197 if (idxRegEfl != UINT8_MAX)
1198 {
1199#ifdef ARCH_AMD64
1200 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1201 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1202 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1203#else
1204 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1205 off, TlbState.idxReg1, idxRegEfl,
1206 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1207#endif
1208 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1209 }
1210 else
1211 {
1212 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1213 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1214 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1215 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1216 }
1217 /* ASSUMES the upper half of idxRegPc is ZERO. */
1218 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1219 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1220 }
1221 break;
1222 case 8:
1223 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1224 break;
1225 default:
1226 AssertFailed();
1227 }
1228
1229 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1230 TlbState.freeRegsAndReleaseVars(pReNative);
1231
1232 /*
1233 * TlbDone:
1234 *
1235 * Commit the new RSP value.
1236 */
1237 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1238 }
1239#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1240
1241#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1242 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1243#endif
1244 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1245 if (idxRegEffSp != idxRegRsp)
1246 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1247
1248 return off;
1249}
1250
1251
1252/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1253#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1254 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1255
1256/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1257 * clears flags. */
1258#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1259 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1260 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1261
1262/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1263#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1264 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1265
1266/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1267 * clears flags. */
1268#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1269 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1270 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1271
1272#undef IEM_MC_IND_CALL_U16_AND_FINISH
1273
1274
1275/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1276#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1277 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1278
1279/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1280 * clears flags. */
1281#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1282 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1283 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1284
1285#undef IEM_MC_IND_CALL_U32_AND_FINISH
1286
1287
1288/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1289 * an extra parameter, for use in 64-bit code. */
1290#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1291 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1292
1293
1294/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1295 * an extra parameter, for use in 64-bit code and we need to check and clear
1296 * flags. */
1297#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1298 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1299 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1300
1301#undef IEM_MC_IND_CALL_U64_AND_FINISH
1302
1303/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1304 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1305DECL_INLINE_THROW(uint32_t)
1306iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1307 uint8_t idxInstr, uint8_t cbVar)
1308{
1309 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1310 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1311
1312 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1313 off = iemNativeRegFlushPendingWrites(pReNative, off);
1314
1315#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1316 Assert(pReNative->Core.offPc == 0);
1317
1318 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1319#endif
1320
1321 /* Get a register with the new PC loaded from idxVarPc.
1322 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1323 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1324
1325 /* Check limit (may #GP(0) + exit TB). */
1326 if (!f64Bit)
1327/** @todo we can skip this test in FLAT 32-bit mode. */
1328 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1329 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1330 else if (cbVar > sizeof(uint32_t))
1331 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1332
1333#if 1
1334 /* Allocate a temporary PC register, we don't want it shadowed. */
1335 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1336 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1337#else
1338 /* Allocate a temporary PC register. */
1339 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1340 true /*fNoVolatileRegs*/);
1341#endif
1342
1343 /* Perform the addition and push the variable to the guest stack. */
1344 /** @todo Flat variants for PC32 variants. */
1345 switch (cbVar)
1346 {
1347 case sizeof(uint16_t):
1348 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1349 /* Truncate the result to 16-bit IP. */
1350 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1351 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1352 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1353 break;
1354 case sizeof(uint32_t):
1355 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1356 /** @todo In FLAT mode we can use the flat variant. */
1357 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1358 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1359 break;
1360 case sizeof(uint64_t):
1361 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1362 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1363 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1364 break;
1365 default:
1366 AssertFailed();
1367 }
1368
1369 /* RSP got changed, so do this again. */
1370 off = iemNativeRegFlushPendingWrites(pReNative, off);
1371
1372 /* Store the result. */
1373 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1374
1375#if 1
1376 /* Need to transfer the shadow information to the new RIP register. */
1377 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1378#else
1379 /* Sync the new PC. */
1380 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1381#endif
1382 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1383 iemNativeRegFreeTmp(pReNative, idxPcReg);
1384 /** @todo implictly free the variable? */
1385
1386 return off;
1387}
1388
1389
1390/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1391 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1392#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1393 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1394
1395/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1396 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1397 * flags. */
1398#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1399 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1400 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1401
1402/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1403 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1404#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1405 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1406
1407/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1408 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1409 * flags. */
1410#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1411 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1412 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1413
1414/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1415 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1416#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1417 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1418
1419/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1420 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1421 * flags. */
1422#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1423 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1424 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1425
1426#undef IEM_MC_REL_CALL_S16_AND_FINISH
1427
1428/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1429 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1430DECL_INLINE_THROW(uint32_t)
1431iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
1432 uint8_t idxInstr)
1433{
1434 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1435 off = iemNativeRegFlushPendingWrites(pReNative, off);
1436
1437#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1438 Assert(pReNative->Core.offPc == 0);
1439
1440 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1441#endif
1442
1443 /* Allocate a temporary PC register. */
1444 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1445 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1446 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1447
1448 /* Calculate the new RIP. */
1449 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1450 /* Truncate the result to 16-bit IP. */
1451 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
1452 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1453 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1454
1455 /* Truncate the result to 16-bit IP. */
1456 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
1457
1458 /* Check limit (may #GP(0) + exit TB). */
1459 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1460
1461 /* Perform the addition and push the variable to the guest stack. */
1462 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1463 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1464
1465 /* RSP got changed, so flush again. */
1466 off = iemNativeRegFlushPendingWrites(pReNative, off);
1467
1468 /* Store the result. */
1469 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1470
1471 /* Need to transfer the shadow information to the new RIP register. */
1472 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1473 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1474 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1475
1476 return off;
1477}
1478
1479
1480/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1481 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1482#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
1483 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
1484
1485/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1486 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1487 * flags. */
1488#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
1489 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
1490 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1491
1492#undef IEM_MC_REL_CALL_S32_AND_FINISH
1493
1494/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1495 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1496DECL_INLINE_THROW(uint32_t)
1497iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
1498 uint8_t idxInstr)
1499{
1500 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1501 off = iemNativeRegFlushPendingWrites(pReNative, off);
1502
1503#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1504 Assert(pReNative->Core.offPc == 0);
1505
1506 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1507#endif
1508
1509 /* Allocate a temporary PC register. */
1510 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1511 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1512 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1513
1514 /* Update the EIP to get the return address. */
1515 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1516
1517 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1518 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1519 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1520 /** @todo we can skip this test in FLAT 32-bit mode. */
1521 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1522
1523 /* Perform Perform the return address to the guest stack. */
1524 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
1525 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1526 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1527
1528 /* RSP got changed, so do this again. */
1529 off = iemNativeRegFlushPendingWrites(pReNative, off);
1530
1531 /* Store the result. */
1532 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1533
1534 /* Need to transfer the shadow information to the new RIP register. */
1535 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1536 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1537 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1538
1539 return off;
1540}
1541
1542
1543/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1544 * an extra parameter, for use in 64-bit code. */
1545#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
1546 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
1547
1548/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1549 * an extra parameter, for use in 64-bit code and we need to check and clear
1550 * flags. */
1551#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
1552 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
1553 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1554
1555#undef IEM_MC_REL_CALL_S64_AND_FINISH
1556
1557/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1558 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1559DECL_INLINE_THROW(uint32_t)
1560iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
1561 uint8_t idxInstr)
1562{
1563 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1564 off = iemNativeRegFlushPendingWrites(pReNative, off);
1565
1566#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1567 Assert(pReNative->Core.offPc == 0);
1568
1569 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1570#endif
1571
1572 /* Allocate a temporary PC register. */
1573 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1574 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1575 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1576
1577 /* Update the RIP to get the return address. */
1578 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
1579
1580 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1581 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
1582 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
1583 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1584
1585 /* Perform Perform the return address to the guest stack. */
1586 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1587 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1588
1589 /* RSP got changed, so do this again. */
1590 off = iemNativeRegFlushPendingWrites(pReNative, off);
1591
1592 /* Store the result. */
1593 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1594
1595 /* Need to transfer the shadow information to the new RIP register. */
1596 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1597 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1598 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1599
1600 return off;
1601}
1602
1603
1604/*********************************************************************************************************************************
1605* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
1606*********************************************************************************************************************************/
1607
1608DECL_FORCE_INLINE_THROW(uint32_t)
1609iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1610 uint16_t cbPopAdd, uint8_t idxRegTmp)
1611{
1612 /* Use16BitSp: */
1613#ifdef RT_ARCH_AMD64
1614 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1615 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1616 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
1617 RT_NOREF(idxRegTmp);
1618#elif defined(RT_ARCH_ARM64)
1619 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
1620 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
1621 /* add tmp, regrsp, #cbMem */
1622 uint16_t const cbCombined = cbMem + cbPopAdd;
1623 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
1624 if (cbCombined >= RT_BIT_32(12))
1625 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
1626 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1627 /* and tmp, tmp, #0xffff */
1628 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1629 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
1630 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
1631 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
1632#else
1633# error "Port me"
1634#endif
1635 return off;
1636}
1637
1638
1639DECL_FORCE_INLINE_THROW(uint32_t)
1640iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1641 uint16_t cbPopAdd)
1642{
1643 /* Use32BitSp: */
1644 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1645 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
1646 return off;
1647}
1648
1649
1650/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
1651#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
1652 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
1653
1654/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
1655#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1656 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1657
1658/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
1659#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1660 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1661
1662/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
1663 * clears flags. */
1664#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
1665 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
1666 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1667
1668/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
1669 * clears flags. */
1670#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1671 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1672 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1673
1674/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
1675 * clears flags. */
1676#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1677 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1678 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1679
1680/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
1681DECL_INLINE_THROW(uint32_t)
1682iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
1683 IEMMODE enmEffOpSize, uint8_t idxInstr)
1684{
1685 RT_NOREF(cbInstr);
1686
1687#ifdef VBOX_STRICT
1688 /*
1689 * Check that the fExec flags we've got make sense.
1690 */
1691 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1692#endif
1693
1694 /*
1695 * To keep things simple we have to commit any pending writes first as we
1696 * may end up making calls.
1697 */
1698 off = iemNativeRegFlushPendingWrites(pReNative, off);
1699
1700 /*
1701 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
1702 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
1703 * directly as the effective stack pointer.
1704 * (Code structure is very similar to that of PUSH)
1705 *
1706 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
1707 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
1708 * aren't commonly used (or useful) and thus not in need of optimizing.
1709 *
1710 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
1711 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
1712 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
1713 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
1714 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
1715 */
1716 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
1717 ? sizeof(uint64_t)
1718 : enmEffOpSize == IEMMODE_32BIT
1719 ? sizeof(uint32_t)
1720 : sizeof(uint16_t);
1721 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
1722 uintptr_t const pfnFunction = fFlat
1723 ? enmEffOpSize == IEMMODE_64BIT
1724 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
1725 : (uintptr_t)iemNativeHlpStackFlatFetchU32
1726 : enmEffOpSize == IEMMODE_32BIT
1727 ? (uintptr_t)iemNativeHlpStackFetchU32
1728 : (uintptr_t)iemNativeHlpStackFetchU16;
1729 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1730 fFlat ? kIemNativeGstRegUse_ForUpdate
1731 : kIemNativeGstRegUse_Calculation,
1732 true /*fNoVolatileRegs*/);
1733 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1734 /** @todo can do a better job picking the register here. For cbMem >= 4 this
1735 * will be the resulting register value. */
1736 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
1737
1738 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1739 if (fFlat)
1740 Assert(idxRegEffSp == idxRegRsp);
1741 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1742 {
1743 Assert(idxRegEffSp != idxRegRsp);
1744 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1745 kIemNativeGstRegUse_ReadOnly);
1746#ifdef RT_ARCH_AMD64
1747 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1748#else
1749 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1750#endif
1751 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1752 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1753 offFixupJumpToUseOtherBitSp = off;
1754 if (enmEffOpSize == IEMMODE_32BIT)
1755 {
1756 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1757 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1758 }
1759 else
1760 {
1761 Assert(enmEffOpSize == IEMMODE_16BIT);
1762 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1763 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1764 idxRegMemResult);
1765 }
1766 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1767 }
1768 /* SpUpdateEnd: */
1769 uint32_t const offLabelSpUpdateEnd = off;
1770
1771 /*
1772 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1773 * we're skipping lookup).
1774 */
1775 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
1776 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1777 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1778 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1779 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1780 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1781 : UINT32_MAX;
1782
1783 if (!TlbState.fSkip)
1784 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1785 else
1786 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1787
1788 /*
1789 * Use16BitSp:
1790 */
1791 if (!fFlat)
1792 {
1793#ifdef RT_ARCH_AMD64
1794 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1795#else
1796 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1797#endif
1798 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1799 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1800 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1801 idxRegMemResult);
1802 else
1803 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1804 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1805 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1806 }
1807
1808 /*
1809 * TlbMiss:
1810 *
1811 * Call helper to do the pushing.
1812 */
1813 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1814
1815#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1816 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1817#else
1818 RT_NOREF(idxInstr);
1819#endif
1820
1821 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1822 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1823 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
1824 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1825
1826
1827 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
1828 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1829 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1830
1831 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1832 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1833
1834 /* Done setting up parameters, make the call. */
1835 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1836
1837 /* Move the return register content to idxRegMemResult. */
1838 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
1839 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
1840
1841 /* Restore variables and guest shadow registers to volatile registers. */
1842 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1843 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1844
1845#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1846 if (!TlbState.fSkip)
1847 {
1848 /* end of TlbMiss - Jump to the done label. */
1849 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1850 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1851
1852 /*
1853 * TlbLookup:
1854 */
1855 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
1856 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1857
1858 /*
1859 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
1860 */
1861 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1862# ifdef IEM_WITH_TLB_STATISTICS
1863 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1864 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1865# endif
1866 switch (cbMem)
1867 {
1868 case 2:
1869 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1870 break;
1871 case 4:
1872 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1873 break;
1874 case 8:
1875 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1876 break;
1877 default:
1878 AssertFailed();
1879 }
1880
1881 TlbState.freeRegsAndReleaseVars(pReNative);
1882
1883 /*
1884 * TlbDone:
1885 *
1886 * Set the new RSP value (FLAT accesses needs to calculate it first) and
1887 * commit the popped register value.
1888 */
1889 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1890 }
1891#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1892
1893 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
1894 if (!f64Bit)
1895/** @todo we can skip this test in FLAT 32-bit mode. */
1896 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1897 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1898 else if (enmEffOpSize == IEMMODE_64BIT)
1899 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1900
1901 /* Complete RSP calculation for FLAT mode. */
1902 if (idxRegEffSp == idxRegRsp)
1903 {
1904 if (enmEffOpSize == IEMMODE_64BIT)
1905 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
1906 else
1907 {
1908 Assert(enmEffOpSize == IEMMODE_32BIT);
1909 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
1910 }
1911 }
1912
1913 /* Commit the result and clear any current guest shadows for RIP. */
1914 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
1915 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1916 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
1917
1918 /* Need to transfer the shadowing information to the host register containing the updated value now. */
1919 if (!fFlat)
1920 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
1921
1922 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1923 if (idxRegEffSp != idxRegRsp)
1924 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1925 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1926 return off;
1927}
1928
1929
1930/*********************************************************************************************************************************
1931* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
1932*********************************************************************************************************************************/
1933
1934#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
1935 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1936
1937/**
1938 * Emits code to check if a \#NM exception should be raised.
1939 *
1940 * @returns New code buffer offset, UINT32_MAX on failure.
1941 * @param pReNative The native recompile state.
1942 * @param off The code buffer offset.
1943 * @param idxInstr The current instruction.
1944 */
1945DECL_INLINE_THROW(uint32_t)
1946iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1947{
1948#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1949 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
1950
1951 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
1952 {
1953#endif
1954 /*
1955 * Make sure we don't have any outstanding guest register writes as we may
1956 * raise an #NM and all guest register must be up to date in CPUMCTX.
1957 */
1958 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
1959 off = iemNativeRegFlushPendingWrites(pReNative, off);
1960
1961#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1962 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1963#else
1964 RT_NOREF(idxInstr);
1965#endif
1966
1967 /* Allocate a temporary CR0 register. */
1968 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
1969 kIemNativeGstRegUse_ReadOnly);
1970
1971 /*
1972 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
1973 * return raisexcpt();
1974 */
1975 /* Test and jump. */
1976 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS,
1977 kIemNativeLabelType_RaiseNm);
1978
1979 /* Free but don't flush the CR0 register. */
1980 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1981
1982#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1983 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
1984 }
1985 else
1986 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
1987#endif
1988
1989 return off;
1990}
1991
1992
1993#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
1994 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1995
1996/**
1997 * Emits code to check if a \#NM exception should be raised.
1998 *
1999 * @returns New code buffer offset, UINT32_MAX on failure.
2000 * @param pReNative The native recompile state.
2001 * @param off The code buffer offset.
2002 * @param idxInstr The current instruction.
2003 */
2004DECL_INLINE_THROW(uint32_t)
2005iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2006{
2007#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2008 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2009
2010 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2011 {
2012#endif
2013 /*
2014 * Make sure we don't have any outstanding guest register writes as we may
2015 * raise an #NM and all guest register must be up to date in CPUMCTX.
2016 */
2017 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2018 off = iemNativeRegFlushPendingWrites(pReNative, off);
2019
2020#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2021 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2022#else
2023 RT_NOREF(idxInstr);
2024#endif
2025
2026 /* Allocate a temporary CR0 register. */
2027 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2028 kIemNativeGstRegUse_Calculation);
2029
2030 /*
2031 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2032 * return raisexcpt();
2033 */
2034 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2035 /* Test and jump. */
2036 off = iemNativeEmitTestIfGpr32EqualsImmAndTbExit(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS,
2037 kIemNativeLabelType_RaiseNm);
2038
2039 /* Free the CR0 register. */
2040 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2041
2042#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2043 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2044 }
2045 else
2046 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2047#endif
2048
2049 return off;
2050}
2051
2052
2053#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2054 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2055
2056/**
2057 * Emits code to check if a \#MF exception should be raised.
2058 *
2059 * @returns New code buffer offset, UINT32_MAX on failure.
2060 * @param pReNative The native recompile state.
2061 * @param off The code buffer offset.
2062 * @param idxInstr The current instruction.
2063 */
2064DECL_INLINE_THROW(uint32_t)
2065iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2066{
2067 /*
2068 * Make sure we don't have any outstanding guest register writes as we may
2069 * raise an #MF and all guest register must be up to date in CPUMCTX.
2070 */
2071 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2072 off = iemNativeRegFlushPendingWrites(pReNative, off);
2073
2074#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2075 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2076#else
2077 RT_NOREF(idxInstr);
2078#endif
2079
2080 /* Allocate a temporary FSW register. */
2081 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2082 kIemNativeGstRegUse_ReadOnly);
2083
2084 /*
2085 * if (FSW & X86_FSW_ES != 0)
2086 * return raisexcpt();
2087 */
2088 /* Test and jump. */
2089 off = iemNativeEmitTestBitInGprAndTbExitIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, kIemNativeLabelType_RaiseMf);
2090
2091 /* Free but don't flush the FSW register. */
2092 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2093
2094 return off;
2095}
2096
2097
2098#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2099 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2100
2101/**
2102 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2103 *
2104 * @returns New code buffer offset, UINT32_MAX on failure.
2105 * @param pReNative The native recompile state.
2106 * @param off The code buffer offset.
2107 * @param idxInstr The current instruction.
2108 */
2109DECL_INLINE_THROW(uint32_t)
2110iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2111{
2112#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2113 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2114
2115 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2116 {
2117#endif
2118 /*
2119 * Make sure we don't have any outstanding guest register writes as we may
2120 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2121 */
2122 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2123 off = iemNativeRegFlushPendingWrites(pReNative, off);
2124
2125#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2126 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2127#else
2128 RT_NOREF(idxInstr);
2129#endif
2130
2131 /* Allocate a temporary CR0 and CR4 register. */
2132 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2133 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2134 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2135
2136 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2137#ifdef RT_ARCH_AMD64
2138 /*
2139 * We do a modified test here:
2140 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2141 * else { goto RaiseSseRelated; }
2142 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2143 * all targets except the 386, which doesn't support SSE, this should
2144 * be a safe assumption.
2145 */
2146 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2147 //pCodeBuf[off++] = 0xcc;
2148 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2149 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2150 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2151 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2152 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2153 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseSseRelated, kIemNativeInstrCond_ne);
2154
2155#elif defined(RT_ARCH_ARM64)
2156 /*
2157 * We do a modified test here:
2158 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2159 * else { goto RaiseSseRelated; }
2160 */
2161 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2162 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2163 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2164 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2165 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2166 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2167 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2168 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2169 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2170 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2171 kIemNativeLabelType_RaiseSseRelated);
2172
2173#else
2174# error "Port me!"
2175#endif
2176
2177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2178 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2179 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2180 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2181
2182#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2183 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2184 }
2185 else
2186 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2187#endif
2188
2189 return off;
2190}
2191
2192
2193#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2194 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2195
2196/**
2197 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2198 *
2199 * @returns New code buffer offset, UINT32_MAX on failure.
2200 * @param pReNative The native recompile state.
2201 * @param off The code buffer offset.
2202 * @param idxInstr The current instruction.
2203 */
2204DECL_INLINE_THROW(uint32_t)
2205iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2206{
2207#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2208 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2209
2210 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2211 {
2212#endif
2213 /*
2214 * Make sure we don't have any outstanding guest register writes as we may
2215 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2216 */
2217 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2218 off = iemNativeRegFlushPendingWrites(pReNative, off);
2219
2220#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2221 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2222#else
2223 RT_NOREF(idxInstr);
2224#endif
2225
2226 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2227 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2228 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2229 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2230 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2231
2232 /*
2233 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2234 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2235 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2236 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2237 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2238 * { likely }
2239 * else { goto RaiseAvxRelated; }
2240 */
2241#ifdef RT_ARCH_AMD64
2242 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2243 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2244 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2245 ^ 0x1a) ) { likely }
2246 else { goto RaiseAvxRelated; } */
2247 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2248 //pCodeBuf[off++] = 0xcc;
2249 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2250 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2251 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2252 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2253 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2254 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2255 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2256 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2257 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2258 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2259 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseAvxRelated, kIemNativeInstrCond_ne);
2260
2261#elif defined(RT_ARCH_ARM64)
2262 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2263 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2264 else { goto RaiseAvxRelated; } */
2265 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2266 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2267 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2268 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2269 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2270 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2271 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2272 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2273 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2274 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2275 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2276 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2277 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2278 kIemNativeLabelType_RaiseAvxRelated);
2279
2280#else
2281# error "Port me!"
2282#endif
2283
2284 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2285 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2286 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2287 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2288#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2289 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2290 }
2291 else
2292 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2293#endif
2294
2295 return off;
2296}
2297
2298
2299#define IEM_MC_RAISE_DIVIDE_ERROR() \
2300 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2301
2302/**
2303 * Emits code to raise a \#DE.
2304 *
2305 * @returns New code buffer offset, UINT32_MAX on failure.
2306 * @param pReNative The native recompile state.
2307 * @param off The code buffer offset.
2308 * @param idxInstr The current instruction.
2309 */
2310DECL_INLINE_THROW(uint32_t)
2311iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2312{
2313 /*
2314 * Make sure we don't have any outstanding guest register writes as we may
2315 */
2316 off = iemNativeRegFlushPendingWrites(pReNative, off);
2317
2318#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2319 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2320#else
2321 RT_NOREF(idxInstr);
2322#endif
2323
2324 /* raise \#DE exception unconditionally. */
2325 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseDe);
2326}
2327
2328
2329#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2330 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2331
2332/**
2333 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2334 *
2335 * @returns New code buffer offset, UINT32_MAX on failure.
2336 * @param pReNative The native recompile state.
2337 * @param off The code buffer offset.
2338 * @param idxInstr The current instruction.
2339 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2340 * @param cbAlign The alignment in bytes to check against.
2341 */
2342DECL_INLINE_THROW(uint32_t)
2343iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2344 uint8_t idxVarEffAddr, uint8_t cbAlign)
2345{
2346 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2347 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2348
2349 /*
2350 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2351 */
2352 off = iemNativeRegFlushPendingWrites(pReNative, off);
2353
2354#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2355 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2356#else
2357 RT_NOREF(idxInstr);
2358#endif
2359
2360 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2361
2362 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxVarReg, cbAlign - 1,
2363 kIemNativeLabelType_RaiseGp0);
2364
2365 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2366 return off;
2367}
2368
2369
2370/*********************************************************************************************************************************
2371* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2372*********************************************************************************************************************************/
2373
2374/**
2375 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2376 *
2377 * @returns Pointer to the condition stack entry on success, NULL on failure
2378 * (too many nestings)
2379 */
2380DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
2381{
2382#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2383 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
2384#endif
2385
2386 uint32_t const idxStack = pReNative->cCondDepth;
2387 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2388
2389 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2390 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2391
2392 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2393 pEntry->fInElse = false;
2394 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2395 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2396
2397 return pEntry;
2398}
2399
2400
2401/**
2402 * Start of the if-block, snapshotting the register and variable state.
2403 */
2404DECL_INLINE_THROW(void)
2405iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
2406{
2407 Assert(offIfBlock != UINT32_MAX);
2408 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2409 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2410 Assert(!pEntry->fInElse);
2411
2412 /* Define the start of the IF block if request or for disassembly purposes. */
2413 if (idxLabelIf != UINT32_MAX)
2414 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
2415#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2416 else
2417 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
2418#else
2419 RT_NOREF(offIfBlock);
2420#endif
2421
2422#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2423 Assert(pReNative->Core.offPc == 0);
2424#endif
2425
2426 /* Copy the initial state so we can restore it in the 'else' block. */
2427 pEntry->InitialState = pReNative->Core;
2428}
2429
2430
2431#define IEM_MC_ELSE() } while (0); \
2432 off = iemNativeEmitElse(pReNative, off); \
2433 do {
2434
2435/** Emits code related to IEM_MC_ELSE. */
2436DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2437{
2438 /* Check sanity and get the conditional stack entry. */
2439 Assert(off != UINT32_MAX);
2440 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2441 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2442 Assert(!pEntry->fInElse);
2443
2444#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2445 /* Writeback any dirty shadow registers. */
2446 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2447 * in one of the branches and leave guest registers already dirty before the start of the if
2448 * block alone. */
2449 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2450#endif
2451
2452 /* Jump to the endif */
2453 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
2454
2455 /* Define the else label and enter the else part of the condition. */
2456 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2457 pEntry->fInElse = true;
2458
2459#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2460 Assert(pReNative->Core.offPc == 0);
2461#endif
2462
2463 /* Snapshot the core state so we can do a merge at the endif and restore
2464 the snapshot we took at the start of the if-block. */
2465 pEntry->IfFinalState = pReNative->Core;
2466 pReNative->Core = pEntry->InitialState;
2467
2468 return off;
2469}
2470
2471
2472#define IEM_MC_ENDIF() } while (0); \
2473 off = iemNativeEmitEndIf(pReNative, off)
2474
2475/** Emits code related to IEM_MC_ENDIF. */
2476DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2477{
2478 /* Check sanity and get the conditional stack entry. */
2479 Assert(off != UINT32_MAX);
2480 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2481 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2482
2483#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2484 Assert(pReNative->Core.offPc == 0);
2485#endif
2486#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2487 /* Writeback any dirty shadow registers (else branch). */
2488 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2489 * in one of the branches and leave guest registers already dirty before the start of the if
2490 * block alone. */
2491 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2492#endif
2493
2494 /*
2495 * Now we have find common group with the core state at the end of the
2496 * if-final. Use the smallest common denominator and just drop anything
2497 * that isn't the same in both states.
2498 */
2499 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
2500 * which is why we're doing this at the end of the else-block.
2501 * But we'd need more info about future for that to be worth the effort. */
2502 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
2503#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2504 Assert( pOther->bmGstRegShadowDirty == 0
2505 && pReNative->Core.bmGstRegShadowDirty == 0);
2506#endif
2507
2508 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
2509 {
2510 /* shadow guest stuff first. */
2511 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
2512 if (fGstRegs)
2513 {
2514 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
2515 do
2516 {
2517 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2518 fGstRegs &= ~RT_BIT_64(idxGstReg);
2519
2520 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
2521 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
2522 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
2523 {
2524 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
2525 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
2526
2527#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2528 /* Writeback any dirty shadow registers we are about to unshadow. */
2529 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
2530#endif
2531 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
2532 }
2533 } while (fGstRegs);
2534 }
2535 else
2536 {
2537 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
2538#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2539 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
2540#endif
2541 }
2542
2543 /* Check variables next. For now we must require them to be identical
2544 or stuff we can recreate. */
2545 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
2546 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
2547 if (fVars)
2548 {
2549 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
2550 do
2551 {
2552 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
2553 fVars &= ~RT_BIT_32(idxVar);
2554
2555 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
2556 {
2557 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
2558 continue;
2559 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2560 {
2561 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2562 if (idxHstReg != UINT8_MAX)
2563 {
2564 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2565 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2566 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
2567 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2568 }
2569 continue;
2570 }
2571 }
2572 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
2573 continue;
2574
2575 /* Irreconcilable, so drop it. */
2576 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2577 if (idxHstReg != UINT8_MAX)
2578 {
2579 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2580 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2581 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
2582 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2583 }
2584 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2585 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
2586 } while (fVars);
2587 }
2588
2589 /* Finally, check that the host register allocations matches. */
2590 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
2591 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
2592 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
2593 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
2594 }
2595
2596 /*
2597 * Define the endif label and maybe the else one if we're still in the 'if' part.
2598 */
2599 if (!pEntry->fInElse)
2600 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2601 else
2602 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
2603 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
2604
2605 /* Pop the conditional stack.*/
2606 pReNative->cCondDepth -= 1;
2607
2608 return off;
2609}
2610
2611
2612#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
2613 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
2614 do {
2615
2616/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
2617DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2618{
2619 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2620 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2621
2622 /* Get the eflags. */
2623 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2624 kIemNativeGstRegUse_ReadOnly);
2625
2626 /* Test and jump. */
2627 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2628
2629 /* Free but don't flush the EFlags register. */
2630 iemNativeRegFreeTmp(pReNative, idxEflReg);
2631
2632 /* Make a copy of the core state now as we start the if-block. */
2633 iemNativeCondStartIfBlock(pReNative, off);
2634
2635 return off;
2636}
2637
2638
2639#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
2640 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
2641 do {
2642
2643/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
2644DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2645{
2646 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2647 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2648
2649 /* Get the eflags. */
2650 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2651 kIemNativeGstRegUse_ReadOnly);
2652
2653 /* Test and jump. */
2654 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2655
2656 /* Free but don't flush the EFlags register. */
2657 iemNativeRegFreeTmp(pReNative, idxEflReg);
2658
2659 /* Make a copy of the core state now as we start the if-block. */
2660 iemNativeCondStartIfBlock(pReNative, off);
2661
2662 return off;
2663}
2664
2665
2666#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
2667 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
2668 do {
2669
2670/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
2671DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2672{
2673 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2674 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2675
2676 /* Get the eflags. */
2677 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2678 kIemNativeGstRegUse_ReadOnly);
2679
2680 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2681 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2682
2683 /* Test and jump. */
2684 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2685
2686 /* Free but don't flush the EFlags register. */
2687 iemNativeRegFreeTmp(pReNative, idxEflReg);
2688
2689 /* Make a copy of the core state now as we start the if-block. */
2690 iemNativeCondStartIfBlock(pReNative, off);
2691
2692 return off;
2693}
2694
2695
2696#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
2697 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
2698 do {
2699
2700/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
2701DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2702{
2703 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2704 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2705
2706 /* Get the eflags. */
2707 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2708 kIemNativeGstRegUse_ReadOnly);
2709
2710 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2711 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2712
2713 /* Test and jump. */
2714 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2715
2716 /* Free but don't flush the EFlags register. */
2717 iemNativeRegFreeTmp(pReNative, idxEflReg);
2718
2719 /* Make a copy of the core state now as we start the if-block. */
2720 iemNativeCondStartIfBlock(pReNative, off);
2721
2722 return off;
2723}
2724
2725
2726#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
2727 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
2728 do {
2729
2730#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
2731 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
2732 do {
2733
2734/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
2735DECL_INLINE_THROW(uint32_t)
2736iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2737 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2738{
2739 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
2740 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2741
2742 /* Get the eflags. */
2743 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2744 kIemNativeGstRegUse_ReadOnly);
2745
2746 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2747 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2748
2749 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2750 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2751 Assert(iBitNo1 != iBitNo2);
2752
2753#ifdef RT_ARCH_AMD64
2754 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
2755
2756 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2757 if (iBitNo1 > iBitNo2)
2758 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2759 else
2760 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2761 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2762
2763#elif defined(RT_ARCH_ARM64)
2764 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2765 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2766
2767 /* and tmpreg, eflreg, #1<<iBitNo1 */
2768 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2769
2770 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2771 if (iBitNo1 > iBitNo2)
2772 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2773 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2774 else
2775 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2776 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2777
2778 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2779
2780#else
2781# error "Port me"
2782#endif
2783
2784 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2785 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2786 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2787
2788 /* Free but don't flush the EFlags and tmp registers. */
2789 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2790 iemNativeRegFreeTmp(pReNative, idxEflReg);
2791
2792 /* Make a copy of the core state now as we start the if-block. */
2793 iemNativeCondStartIfBlock(pReNative, off);
2794
2795 return off;
2796}
2797
2798
2799#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
2800 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
2801 do {
2802
2803#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
2804 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
2805 do {
2806
2807/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
2808 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
2809DECL_INLINE_THROW(uint32_t)
2810iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
2811 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2812{
2813 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
2814 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2815
2816 /* We need an if-block label for the non-inverted variant. */
2817 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
2818 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
2819
2820 /* Get the eflags. */
2821 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2822 kIemNativeGstRegUse_ReadOnly);
2823
2824 /* Translate the flag masks to bit numbers. */
2825 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2826 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2827
2828 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2829 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2830 Assert(iBitNo1 != iBitNo);
2831
2832 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2833 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2834 Assert(iBitNo2 != iBitNo);
2835 Assert(iBitNo2 != iBitNo1);
2836
2837#ifdef RT_ARCH_AMD64
2838 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
2839#elif defined(RT_ARCH_ARM64)
2840 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2841#endif
2842
2843 /* Check for the lone bit first. */
2844 if (!fInverted)
2845 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2846 else
2847 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
2848
2849 /* Then extract and compare the other two bits. */
2850#ifdef RT_ARCH_AMD64
2851 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2852 if (iBitNo1 > iBitNo2)
2853 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2854 else
2855 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2856 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2857
2858#elif defined(RT_ARCH_ARM64)
2859 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2860
2861 /* and tmpreg, eflreg, #1<<iBitNo1 */
2862 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2863
2864 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2865 if (iBitNo1 > iBitNo2)
2866 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2867 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2868 else
2869 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2870 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2871
2872 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2873
2874#else
2875# error "Port me"
2876#endif
2877
2878 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2879 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2880 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2881
2882 /* Free but don't flush the EFlags and tmp registers. */
2883 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2884 iemNativeRegFreeTmp(pReNative, idxEflReg);
2885
2886 /* Make a copy of the core state now as we start the if-block. */
2887 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
2888
2889 return off;
2890}
2891
2892
2893#define IEM_MC_IF_CX_IS_NZ() \
2894 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
2895 do {
2896
2897/** Emits code for IEM_MC_IF_CX_IS_NZ. */
2898DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2899{
2900 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2901
2902 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2903 kIemNativeGstRegUse_ReadOnly);
2904 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
2905 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2906
2907 iemNativeCondStartIfBlock(pReNative, off);
2908 return off;
2909}
2910
2911
2912#define IEM_MC_IF_ECX_IS_NZ() \
2913 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
2914 do {
2915
2916#define IEM_MC_IF_RCX_IS_NZ() \
2917 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
2918 do {
2919
2920/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
2921DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2922{
2923 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2924
2925 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2926 kIemNativeGstRegUse_ReadOnly);
2927 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
2928 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2929
2930 iemNativeCondStartIfBlock(pReNative, off);
2931 return off;
2932}
2933
2934
2935#define IEM_MC_IF_CX_IS_NOT_ONE() \
2936 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
2937 do {
2938
2939/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
2940DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2941{
2942 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2943
2944 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2945 kIemNativeGstRegUse_ReadOnly);
2946#ifdef RT_ARCH_AMD64
2947 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2948#else
2949 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2950 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
2951 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2952#endif
2953 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2954
2955 iemNativeCondStartIfBlock(pReNative, off);
2956 return off;
2957}
2958
2959
2960#define IEM_MC_IF_ECX_IS_NOT_ONE() \
2961 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
2962 do {
2963
2964#define IEM_MC_IF_RCX_IS_NOT_ONE() \
2965 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
2966 do {
2967
2968/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
2969DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2970{
2971 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2972
2973 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2974 kIemNativeGstRegUse_ReadOnly);
2975 if (f64Bit)
2976 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2977 else
2978 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2979 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2980
2981 iemNativeCondStartIfBlock(pReNative, off);
2982 return off;
2983}
2984
2985
2986#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
2987 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
2988 do {
2989
2990#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
2991 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
2992 do {
2993
2994/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
2995 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
2996DECL_INLINE_THROW(uint32_t)
2997iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
2998{
2999 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3000 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3001
3002 /* We have to load both RCX and EFLAGS before we can start branching,
3003 otherwise we'll end up in the else-block with an inconsistent
3004 register allocator state.
3005 Doing EFLAGS first as it's more likely to be loaded, right? */
3006 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3007 kIemNativeGstRegUse_ReadOnly);
3008 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3009 kIemNativeGstRegUse_ReadOnly);
3010
3011 /** @todo we could reduce this to a single branch instruction by spending a
3012 * temporary register and some setnz stuff. Not sure if loops are
3013 * worth it. */
3014 /* Check CX. */
3015#ifdef RT_ARCH_AMD64
3016 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3017#else
3018 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3019 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3020 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3021#endif
3022
3023 /* Check the EFlags bit. */
3024 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3025 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3026 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3027 !fCheckIfSet /*fJmpIfSet*/);
3028
3029 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3030 iemNativeRegFreeTmp(pReNative, idxEflReg);
3031
3032 iemNativeCondStartIfBlock(pReNative, off);
3033 return off;
3034}
3035
3036
3037#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3038 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3039 do {
3040
3041#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3042 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3043 do {
3044
3045#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3046 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3047 do {
3048
3049#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3050 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3051 do {
3052
3053/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3054 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3055 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3056 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3057DECL_INLINE_THROW(uint32_t)
3058iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3059 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3060{
3061 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3062 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3063
3064 /* We have to load both RCX and EFLAGS before we can start branching,
3065 otherwise we'll end up in the else-block with an inconsistent
3066 register allocator state.
3067 Doing EFLAGS first as it's more likely to be loaded, right? */
3068 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3069 kIemNativeGstRegUse_ReadOnly);
3070 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3071 kIemNativeGstRegUse_ReadOnly);
3072
3073 /** @todo we could reduce this to a single branch instruction by spending a
3074 * temporary register and some setnz stuff. Not sure if loops are
3075 * worth it. */
3076 /* Check RCX/ECX. */
3077 if (f64Bit)
3078 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3079 else
3080 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3081
3082 /* Check the EFlags bit. */
3083 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3084 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3085 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3086 !fCheckIfSet /*fJmpIfSet*/);
3087
3088 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3089 iemNativeRegFreeTmp(pReNative, idxEflReg);
3090
3091 iemNativeCondStartIfBlock(pReNative, off);
3092 return off;
3093}
3094
3095
3096#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3097 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3098 do {
3099
3100/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3101DECL_INLINE_THROW(uint32_t)
3102iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3103{
3104 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3105
3106 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3107 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3108 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3109 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3110
3111 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3112
3113 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3114
3115 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3116
3117 iemNativeCondStartIfBlock(pReNative, off);
3118 return off;
3119}
3120
3121
3122#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3123 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3124 do {
3125
3126/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3127DECL_INLINE_THROW(uint32_t)
3128iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3129{
3130 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3131 Assert(iGReg < 16);
3132
3133 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3134 kIemNativeGstRegUse_ReadOnly);
3135
3136 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3137
3138 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3139
3140 iemNativeCondStartIfBlock(pReNative, off);
3141 return off;
3142}
3143
3144
3145
3146/*********************************************************************************************************************************
3147* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3148*********************************************************************************************************************************/
3149
3150#define IEM_MC_NOREF(a_Name) \
3151 RT_NOREF_PV(a_Name)
3152
3153#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3154 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3155
3156#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3157 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3158
3159#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3160 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3161
3162#define IEM_MC_LOCAL(a_Type, a_Name) \
3163 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3164
3165#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3166 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3167
3168#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3169 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3170
3171
3172/**
3173 * Sets the host register for @a idxVarRc to @a idxReg.
3174 *
3175 * The register must not be allocated. Any guest register shadowing will be
3176 * implictly dropped by this call.
3177 *
3178 * The variable must not have any register associated with it (causes
3179 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3180 * implied.
3181 *
3182 * @returns idxReg
3183 * @param pReNative The recompiler state.
3184 * @param idxVar The variable.
3185 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3186 * @param off For recording in debug info.
3187 *
3188 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3189 */
3190DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
3191{
3192 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3193 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3194 Assert(!pVar->fRegAcquired);
3195 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3196 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3197 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3198
3199 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3200 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3201
3202 iemNativeVarSetKindToStack(pReNative, idxVar);
3203 pVar->idxReg = idxReg;
3204
3205 return idxReg;
3206}
3207
3208
3209/**
3210 * A convenient helper function.
3211 */
3212DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3213 uint8_t idxReg, uint32_t *poff)
3214{
3215 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
3216 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3217 return idxReg;
3218}
3219
3220
3221/**
3222 * This is called by IEM_MC_END() to clean up all variables.
3223 */
3224DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3225{
3226 uint32_t const bmVars = pReNative->Core.bmVars;
3227 if (bmVars != 0)
3228 iemNativeVarFreeAllSlow(pReNative, bmVars);
3229 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3230 Assert(pReNative->Core.bmStack == 0);
3231}
3232
3233
3234#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3235
3236/**
3237 * This is called by IEM_MC_FREE_LOCAL.
3238 */
3239DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3240{
3241 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3242 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3243 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3244}
3245
3246
3247#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3248
3249/**
3250 * This is called by IEM_MC_FREE_ARG.
3251 */
3252DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3253{
3254 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3255 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3256 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3257}
3258
3259
3260#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
3261
3262/**
3263 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
3264 */
3265DECL_INLINE_THROW(uint32_t)
3266iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
3267{
3268 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
3269 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
3270 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3271 Assert( pVarDst->cbVar == sizeof(uint16_t)
3272 || pVarDst->cbVar == sizeof(uint32_t));
3273
3274 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
3275 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
3276 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
3277 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
3278 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3279
3280 Assert(pVarDst->cbVar < pVarSrc->cbVar);
3281
3282 /*
3283 * Special case for immediates.
3284 */
3285 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
3286 {
3287 switch (pVarDst->cbVar)
3288 {
3289 case sizeof(uint16_t):
3290 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
3291 break;
3292 case sizeof(uint32_t):
3293 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
3294 break;
3295 default: AssertFailed(); break;
3296 }
3297 }
3298 else
3299 {
3300 /*
3301 * The generic solution for now.
3302 */
3303 /** @todo optimize this by having the python script make sure the source
3304 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
3305 * statement. Then we could just transfer the register assignments. */
3306 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
3307 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
3308 switch (pVarDst->cbVar)
3309 {
3310 case sizeof(uint16_t):
3311 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
3312 break;
3313 case sizeof(uint32_t):
3314 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
3315 break;
3316 default: AssertFailed(); break;
3317 }
3318 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
3319 iemNativeVarRegisterRelease(pReNative, idxVarDst);
3320 }
3321 return off;
3322}
3323
3324
3325
3326/*********************************************************************************************************************************
3327* Emitters for IEM_MC_CALL_CIMPL_XXX *
3328*********************************************************************************************************************************/
3329
3330/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
3331DECL_INLINE_THROW(uint32_t)
3332iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
3333 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
3334
3335{
3336 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
3337
3338#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3339 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
3340 when a calls clobber any of the relevant control registers. */
3341# if 1
3342 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
3343 {
3344 /* Likely as long as call+ret are done via cimpl. */
3345 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
3346 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
3347 }
3348 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
3349 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3350 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
3351 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3352 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
3353 else
3354 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3355 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3356 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3357
3358# else
3359 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
3360 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3361 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
3362 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3363 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
3364 || pfnCImpl == (uintptr_t)iemCImpl_callf
3365 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
3366 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
3367 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3368 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3369 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3370# endif
3371
3372# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3373 /* Mark the host floating point control register as not synced if MXCSR is modified. */
3374 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
3375 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
3376# endif
3377#endif
3378
3379 /*
3380 * Do all the call setup and cleanup.
3381 */
3382 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
3383
3384 /*
3385 * Load the two or three hidden arguments.
3386 */
3387#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3388 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3389 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3390 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
3391#else
3392 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3393 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
3394#endif
3395
3396 /*
3397 * Make the call and check the return code.
3398 *
3399 * Shadow PC copies are always flushed here, other stuff depends on flags.
3400 * Segment and general purpose registers are explictily flushed via the
3401 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
3402 * macros.
3403 */
3404 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
3405#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3406 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3407#endif
3408 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
3409 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
3410 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3411 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3412
3413 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3414}
3415
3416
3417#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
3418 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
3419
3420/** Emits code for IEM_MC_CALL_CIMPL_1. */
3421DECL_INLINE_THROW(uint32_t)
3422iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3423 uintptr_t pfnCImpl, uint8_t idxArg0)
3424{
3425 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3426 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
3427}
3428
3429
3430#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
3431 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
3432
3433/** Emits code for IEM_MC_CALL_CIMPL_2. */
3434DECL_INLINE_THROW(uint32_t)
3435iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3436 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
3437{
3438 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3439 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3440 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
3441}
3442
3443
3444#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
3445 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3446 (uintptr_t)a_pfnCImpl, a0, a1, a2)
3447
3448/** Emits code for IEM_MC_CALL_CIMPL_3. */
3449DECL_INLINE_THROW(uint32_t)
3450iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3451 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3452{
3453 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3454 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3455 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3456 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
3457}
3458
3459
3460#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
3461 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3462 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
3463
3464/** Emits code for IEM_MC_CALL_CIMPL_4. */
3465DECL_INLINE_THROW(uint32_t)
3466iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3467 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3468{
3469 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3470 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3471 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3472 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3473 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
3474}
3475
3476
3477#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
3478 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3479 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
3480
3481/** Emits code for IEM_MC_CALL_CIMPL_4. */
3482DECL_INLINE_THROW(uint32_t)
3483iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3484 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
3485{
3486 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3487 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3488 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3489 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3490 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
3491 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
3492}
3493
3494
3495/** Recompiler debugging: Flush guest register shadow copies. */
3496#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
3497
3498
3499
3500/*********************************************************************************************************************************
3501* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
3502*********************************************************************************************************************************/
3503
3504/**
3505 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
3506 */
3507DECL_INLINE_THROW(uint32_t)
3508iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3509 uintptr_t pfnAImpl, uint8_t cArgs)
3510{
3511 if (idxVarRc != UINT8_MAX)
3512 {
3513 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
3514 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
3515 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3516 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3517 }
3518
3519 /*
3520 * Do all the call setup and cleanup.
3521 *
3522 * It is only required to flush pending guest register writes in call volatile registers as
3523 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
3524 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
3525 * no matter the fFlushPendingWrites parameter.
3526 */
3527 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
3528
3529 /*
3530 * Make the call and update the return code variable if we've got one.
3531 */
3532 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
3533 if (idxVarRc != UINT8_MAX)
3534 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
3535
3536 return off;
3537}
3538
3539
3540
3541#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
3542 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
3543
3544#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
3545 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
3546
3547/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
3548DECL_INLINE_THROW(uint32_t)
3549iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
3550{
3551 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
3552}
3553
3554
3555#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
3556 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
3557
3558#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
3559 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
3560
3561/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
3562DECL_INLINE_THROW(uint32_t)
3563iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
3564{
3565 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3566 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
3567}
3568
3569
3570#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
3571 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
3572
3573#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
3574 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
3575
3576/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
3577DECL_INLINE_THROW(uint32_t)
3578iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3579 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
3580{
3581 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3582 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3583 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
3584}
3585
3586
3587#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
3588 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
3589
3590#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
3591 IEM_MC_LOCAL(a_rcType, a_rc); \
3592 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
3593
3594/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
3595DECL_INLINE_THROW(uint32_t)
3596iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3597 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3598{
3599 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3600 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3601 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3602 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
3603}
3604
3605
3606#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
3607 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3608
3609#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
3610 IEM_MC_LOCAL(a_rcType, a_rc); \
3611 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3612
3613/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
3614DECL_INLINE_THROW(uint32_t)
3615iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3616 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3617{
3618 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3619 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3620 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3621 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
3622 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
3623}
3624
3625
3626
3627/*********************************************************************************************************************************
3628* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
3629*********************************************************************************************************************************/
3630
3631#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
3632 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
3633
3634#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3635 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
3636
3637#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3638 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
3639
3640#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3641 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
3642
3643
3644/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
3645 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
3646DECL_INLINE_THROW(uint32_t)
3647iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
3648{
3649 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3650 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3651 Assert(iGRegEx < 20);
3652
3653 /* Same discussion as in iemNativeEmitFetchGregU16 */
3654 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3655 kIemNativeGstRegUse_ReadOnly);
3656
3657 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3658 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3659
3660 /* The value is zero-extended to the full 64-bit host register width. */
3661 if (iGRegEx < 16)
3662 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3663 else
3664 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3665
3666 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3667 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3668 return off;
3669}
3670
3671
3672#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3673 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
3674
3675#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3676 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
3677
3678#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3679 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
3680
3681/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
3682DECL_INLINE_THROW(uint32_t)
3683iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
3684{
3685 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3686 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3687 Assert(iGRegEx < 20);
3688
3689 /* Same discussion as in iemNativeEmitFetchGregU16 */
3690 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3691 kIemNativeGstRegUse_ReadOnly);
3692
3693 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3694 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3695
3696 if (iGRegEx < 16)
3697 {
3698 switch (cbSignExtended)
3699 {
3700 case sizeof(uint16_t):
3701 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3702 break;
3703 case sizeof(uint32_t):
3704 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3705 break;
3706 case sizeof(uint64_t):
3707 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3708 break;
3709 default: AssertFailed(); break;
3710 }
3711 }
3712 else
3713 {
3714 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3715 switch (cbSignExtended)
3716 {
3717 case sizeof(uint16_t):
3718 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3719 break;
3720 case sizeof(uint32_t):
3721 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3722 break;
3723 case sizeof(uint64_t):
3724 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3725 break;
3726 default: AssertFailed(); break;
3727 }
3728 }
3729
3730 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3731 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3732 return off;
3733}
3734
3735
3736
3737#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
3738 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
3739
3740#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
3741 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3742
3743#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
3744 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3745
3746/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
3747DECL_INLINE_THROW(uint32_t)
3748iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3749{
3750 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3751 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3752 Assert(iGReg < 16);
3753
3754 /*
3755 * We can either just load the low 16-bit of the GPR into a host register
3756 * for the variable, or we can do so via a shadow copy host register. The
3757 * latter will avoid having to reload it if it's being stored later, but
3758 * will waste a host register if it isn't touched again. Since we don't
3759 * know what going to happen, we choose the latter for now.
3760 */
3761 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3762 kIemNativeGstRegUse_ReadOnly);
3763
3764 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3765 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3766 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3767 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3768
3769 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3770 return off;
3771}
3772
3773#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
3774 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_i16Dst, a_iGReg, sizeof(uint32_t)) /* Note! ON ARM we use 32-bit registers for 16-bit. */
3775
3776#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
3777 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3778
3779#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
3780 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3781
3782/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
3783DECL_INLINE_THROW(uint32_t)
3784iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
3785{
3786 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3787 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3788 Assert(iGReg < 16);
3789
3790 /*
3791 * We can either just load the low 16-bit of the GPR into a host register
3792 * for the variable, or we can do so via a shadow copy host register. The
3793 * latter will avoid having to reload it if it's being stored later, but
3794 * will waste a host register if it isn't touched again. Since we don't
3795 * know what going to happen, we choose the latter for now.
3796 */
3797 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3798 kIemNativeGstRegUse_ReadOnly);
3799
3800 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3801 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3802 if (cbSignExtended == sizeof(uint32_t))
3803 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3804 else
3805 {
3806 Assert(cbSignExtended == sizeof(uint64_t));
3807 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3808 }
3809 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3810
3811 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3812 return off;
3813}
3814
3815
3816#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
3817 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
3818
3819#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
3820 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
3821
3822#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
3823 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
3824
3825/** Emits code for IEM_MC_FETCH_GREG_U32. */
3826DECL_INLINE_THROW(uint32_t)
3827iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3828{
3829 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3830 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3831 Assert(iGReg < 16);
3832
3833 /*
3834 * We can either just load the low 16-bit of the GPR into a host register
3835 * for the variable, or we can do so via a shadow copy host register. The
3836 * latter will avoid having to reload it if it's being stored later, but
3837 * will waste a host register if it isn't touched again. Since we don't
3838 * know what going to happen, we choose the latter for now.
3839 */
3840 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3841 kIemNativeGstRegUse_ReadOnly);
3842
3843 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3844 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3845 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3846 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3847
3848 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3849 return off;
3850}
3851
3852
3853#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
3854 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
3855
3856/** Emits code for IEM_MC_FETCH_GREG_U32. */
3857DECL_INLINE_THROW(uint32_t)
3858iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3859{
3860 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3861 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3862 Assert(iGReg < 16);
3863
3864 /*
3865 * We can either just load the low 32-bit of the GPR into a host register
3866 * for the variable, or we can do so via a shadow copy host register. The
3867 * latter will avoid having to reload it if it's being stored later, but
3868 * will waste a host register if it isn't touched again. Since we don't
3869 * know what going to happen, we choose the latter for now.
3870 */
3871 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3872 kIemNativeGstRegUse_ReadOnly);
3873
3874 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3875 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3876 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3877 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3878
3879 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3880 return off;
3881}
3882
3883
3884#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
3885 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3886
3887#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
3888 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3889
3890/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
3891 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
3892DECL_INLINE_THROW(uint32_t)
3893iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3894{
3895 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3896 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3897 Assert(iGReg < 16);
3898
3899 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3900 kIemNativeGstRegUse_ReadOnly);
3901
3902 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3903 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3904 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
3905 /** @todo name the register a shadow one already? */
3906 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3907
3908 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3909 return off;
3910}
3911
3912
3913#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3914#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
3915 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
3916
3917/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
3918DECL_INLINE_THROW(uint32_t)
3919iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
3920{
3921 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3922 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
3923 Assert(iGRegLo < 16 && iGRegHi < 16);
3924
3925 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
3926 kIemNativeGstRegUse_ReadOnly);
3927 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
3928 kIemNativeGstRegUse_ReadOnly);
3929
3930 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3931 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
3932 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
3933 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
3934
3935 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
3936 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
3937 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
3938 return off;
3939}
3940#endif
3941
3942
3943/*********************************************************************************************************************************
3944* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
3945*********************************************************************************************************************************/
3946
3947#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
3948 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
3949
3950/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
3951DECL_INLINE_THROW(uint32_t)
3952iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
3953{
3954 Assert(iGRegEx < 20);
3955 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3956 kIemNativeGstRegUse_ForUpdate);
3957#ifdef RT_ARCH_AMD64
3958 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3959
3960 /* To the lowest byte of the register: mov r8, imm8 */
3961 if (iGRegEx < 16)
3962 {
3963 if (idxGstTmpReg >= 8)
3964 pbCodeBuf[off++] = X86_OP_REX_B;
3965 else if (idxGstTmpReg >= 4)
3966 pbCodeBuf[off++] = X86_OP_REX;
3967 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
3968 pbCodeBuf[off++] = u8Value;
3969 }
3970 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
3971 else if (idxGstTmpReg < 4)
3972 {
3973 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
3974 pbCodeBuf[off++] = u8Value;
3975 }
3976 else
3977 {
3978 /* ror reg64, 8 */
3979 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3980 pbCodeBuf[off++] = 0xc1;
3981 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3982 pbCodeBuf[off++] = 8;
3983
3984 /* mov reg8, imm8 */
3985 if (idxGstTmpReg >= 8)
3986 pbCodeBuf[off++] = X86_OP_REX_B;
3987 else if (idxGstTmpReg >= 4)
3988 pbCodeBuf[off++] = X86_OP_REX;
3989 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
3990 pbCodeBuf[off++] = u8Value;
3991
3992 /* rol reg64, 8 */
3993 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3994 pbCodeBuf[off++] = 0xc1;
3995 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3996 pbCodeBuf[off++] = 8;
3997 }
3998
3999#elif defined(RT_ARCH_ARM64)
4000 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4001 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4002 if (iGRegEx < 16)
4003 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4004 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4005 else
4006 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4007 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4008 iemNativeRegFreeTmp(pReNative, idxImmReg);
4009
4010#else
4011# error "Port me!"
4012#endif
4013
4014 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4015
4016#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4017 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4018#endif
4019
4020 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4021 return off;
4022}
4023
4024
4025#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4026 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4027
4028/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4029DECL_INLINE_THROW(uint32_t)
4030iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4031{
4032 Assert(iGRegEx < 20);
4033 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4034
4035 /*
4036 * If it's a constant value (unlikely) we treat this as a
4037 * IEM_MC_STORE_GREG_U8_CONST statement.
4038 */
4039 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4040 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4041 { /* likely */ }
4042 else
4043 {
4044 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4045 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4046 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4047 }
4048
4049 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4050 kIemNativeGstRegUse_ForUpdate);
4051 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4052
4053#ifdef RT_ARCH_AMD64
4054 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4055 if (iGRegEx < 16)
4056 {
4057 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4058 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4059 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4060 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4061 pbCodeBuf[off++] = X86_OP_REX;
4062 pbCodeBuf[off++] = 0x8a;
4063 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4064 }
4065 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4066 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4067 {
4068 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4069 pbCodeBuf[off++] = 0x8a;
4070 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4071 }
4072 else
4073 {
4074 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4075
4076 /* ror reg64, 8 */
4077 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4078 pbCodeBuf[off++] = 0xc1;
4079 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4080 pbCodeBuf[off++] = 8;
4081
4082 /* mov reg8, reg8(r/m) */
4083 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4084 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4085 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4086 pbCodeBuf[off++] = X86_OP_REX;
4087 pbCodeBuf[off++] = 0x8a;
4088 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4089
4090 /* rol reg64, 8 */
4091 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4092 pbCodeBuf[off++] = 0xc1;
4093 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4094 pbCodeBuf[off++] = 8;
4095 }
4096
4097#elif defined(RT_ARCH_ARM64)
4098 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4099 or
4100 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4101 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4102 if (iGRegEx < 16)
4103 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4104 else
4105 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4106
4107#else
4108# error "Port me!"
4109#endif
4110 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4111
4112 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4113
4114#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4115 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4116#endif
4117 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4118 return off;
4119}
4120
4121
4122
4123#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4124 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4125
4126/** Emits code for IEM_MC_STORE_GREG_U16. */
4127DECL_INLINE_THROW(uint32_t)
4128iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4129{
4130 Assert(iGReg < 16);
4131 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4132 kIemNativeGstRegUse_ForUpdate);
4133#ifdef RT_ARCH_AMD64
4134 /* mov reg16, imm16 */
4135 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4136 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4137 if (idxGstTmpReg >= 8)
4138 pbCodeBuf[off++] = X86_OP_REX_B;
4139 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4140 pbCodeBuf[off++] = RT_BYTE1(uValue);
4141 pbCodeBuf[off++] = RT_BYTE2(uValue);
4142
4143#elif defined(RT_ARCH_ARM64)
4144 /* movk xdst, #uValue, lsl #0 */
4145 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4146 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4147
4148#else
4149# error "Port me!"
4150#endif
4151
4152 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4153
4154#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4155 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4156#endif
4157 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4158 return off;
4159}
4160
4161
4162#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4163 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4164
4165/** Emits code for IEM_MC_STORE_GREG_U16. */
4166DECL_INLINE_THROW(uint32_t)
4167iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4168{
4169 Assert(iGReg < 16);
4170 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4171
4172 /*
4173 * If it's a constant value (unlikely) we treat this as a
4174 * IEM_MC_STORE_GREG_U16_CONST statement.
4175 */
4176 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4177 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4178 { /* likely */ }
4179 else
4180 {
4181 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4182 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4183 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4184 }
4185
4186 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4187 kIemNativeGstRegUse_ForUpdate);
4188
4189#ifdef RT_ARCH_AMD64
4190 /* mov reg16, reg16 or [mem16] */
4191 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4192 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4193 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4194 {
4195 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4196 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4197 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4198 pbCodeBuf[off++] = 0x8b;
4199 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4200 }
4201 else
4202 {
4203 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4204 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4205 if (idxGstTmpReg >= 8)
4206 pbCodeBuf[off++] = X86_OP_REX_R;
4207 pbCodeBuf[off++] = 0x8b;
4208 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4209 }
4210
4211#elif defined(RT_ARCH_ARM64)
4212 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4213 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4214 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4215 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4216 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4217
4218#else
4219# error "Port me!"
4220#endif
4221
4222 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4223
4224#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4225 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4226#endif
4227 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4228 return off;
4229}
4230
4231
4232#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
4233 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
4234
4235/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
4236DECL_INLINE_THROW(uint32_t)
4237iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
4238{
4239 Assert(iGReg < 16);
4240 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4241 kIemNativeGstRegUse_ForFullWrite);
4242 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4243#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4244 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4245#endif
4246 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4247 return off;
4248}
4249
4250
4251#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
4252 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
4253
4254#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
4255 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
4256
4257/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
4258DECL_INLINE_THROW(uint32_t)
4259iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4260{
4261 Assert(iGReg < 16);
4262 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4263
4264 /*
4265 * If it's a constant value (unlikely) we treat this as a
4266 * IEM_MC_STORE_GREG_U32_CONST statement.
4267 */
4268 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4269 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4270 { /* likely */ }
4271 else
4272 {
4273 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4274 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4275 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
4276 }
4277
4278 /*
4279 * For the rest we allocate a guest register for the variable and writes
4280 * it to the CPUMCTX structure.
4281 */
4282 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4283#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4284 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4285#else
4286 RT_NOREF(idxVarReg);
4287#endif
4288#ifdef VBOX_STRICT
4289 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
4290#endif
4291 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4292 return off;
4293}
4294
4295
4296#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
4297 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
4298
4299/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
4300DECL_INLINE_THROW(uint32_t)
4301iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
4302{
4303 Assert(iGReg < 16);
4304 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4305 kIemNativeGstRegUse_ForFullWrite);
4306 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4307#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4308 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4309#endif
4310 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4311 return off;
4312}
4313
4314
4315#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
4316 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
4317
4318#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
4319 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
4320
4321/** Emits code for IEM_MC_STORE_GREG_U64. */
4322DECL_INLINE_THROW(uint32_t)
4323iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4324{
4325 Assert(iGReg < 16);
4326 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4327
4328 /*
4329 * If it's a constant value (unlikely) we treat this as a
4330 * IEM_MC_STORE_GREG_U64_CONST statement.
4331 */
4332 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4333 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4334 { /* likely */ }
4335 else
4336 {
4337 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4338 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4339 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
4340 }
4341
4342 /*
4343 * For the rest we allocate a guest register for the variable and writes
4344 * it to the CPUMCTX structure.
4345 */
4346 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4347#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4348 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4349#else
4350 RT_NOREF(idxVarReg);
4351#endif
4352 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4353 return off;
4354}
4355
4356
4357#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
4358 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
4359
4360/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
4361DECL_INLINE_THROW(uint32_t)
4362iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
4363{
4364 Assert(iGReg < 16);
4365 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4366 kIemNativeGstRegUse_ForUpdate);
4367 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
4368#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4369 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4370#endif
4371 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4372 return off;
4373}
4374
4375
4376#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4377#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
4378 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
4379
4380/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4381DECL_INLINE_THROW(uint32_t)
4382iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
4383{
4384 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4385 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4386 Assert(iGRegLo < 16 && iGRegHi < 16);
4387
4388 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4389 kIemNativeGstRegUse_ForFullWrite);
4390 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4391 kIemNativeGstRegUse_ForFullWrite);
4392
4393 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4394 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
4395 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
4396 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
4397
4398 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4399 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4400 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4401 return off;
4402}
4403#endif
4404
4405
4406/*********************************************************************************************************************************
4407* General purpose register manipulation (add, sub). *
4408*********************************************************************************************************************************/
4409
4410#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4411 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4412
4413/** Emits code for IEM_MC_ADD_GREG_U16. */
4414DECL_INLINE_THROW(uint32_t)
4415iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
4416{
4417 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4418 kIemNativeGstRegUse_ForUpdate);
4419
4420#ifdef RT_ARCH_AMD64
4421 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4422 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4423 if (idxGstTmpReg >= 8)
4424 pbCodeBuf[off++] = X86_OP_REX_B;
4425 if (uAddend == 1)
4426 {
4427 pbCodeBuf[off++] = 0xff; /* inc */
4428 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4429 }
4430 else
4431 {
4432 pbCodeBuf[off++] = 0x81;
4433 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4434 pbCodeBuf[off++] = uAddend;
4435 pbCodeBuf[off++] = 0;
4436 }
4437
4438#else
4439 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4440 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4441
4442 /* sub tmp, gstgrp, uAddend */
4443 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
4444
4445 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4446 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4447
4448 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4449#endif
4450
4451 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4452
4453#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4454 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4455#endif
4456
4457 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4458 return off;
4459}
4460
4461
4462#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
4463 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4464
4465#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
4466 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4467
4468/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
4469DECL_INLINE_THROW(uint32_t)
4470iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
4471{
4472 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4473 kIemNativeGstRegUse_ForUpdate);
4474
4475#ifdef RT_ARCH_AMD64
4476 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4477 if (f64Bit)
4478 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4479 else if (idxGstTmpReg >= 8)
4480 pbCodeBuf[off++] = X86_OP_REX_B;
4481 if (uAddend == 1)
4482 {
4483 pbCodeBuf[off++] = 0xff; /* inc */
4484 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4485 }
4486 else if (uAddend < 128)
4487 {
4488 pbCodeBuf[off++] = 0x83; /* add */
4489 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4490 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4491 }
4492 else
4493 {
4494 pbCodeBuf[off++] = 0x81; /* add */
4495 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4496 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4497 pbCodeBuf[off++] = 0;
4498 pbCodeBuf[off++] = 0;
4499 pbCodeBuf[off++] = 0;
4500 }
4501
4502#else
4503 /* sub tmp, gstgrp, uAddend */
4504 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4505 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
4506
4507#endif
4508
4509 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4510
4511#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4512 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4513#endif
4514
4515 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4516 return off;
4517}
4518
4519
4520
4521#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4522 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4523
4524/** Emits code for IEM_MC_SUB_GREG_U16. */
4525DECL_INLINE_THROW(uint32_t)
4526iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
4527{
4528 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4529 kIemNativeGstRegUse_ForUpdate);
4530
4531#ifdef RT_ARCH_AMD64
4532 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4533 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4534 if (idxGstTmpReg >= 8)
4535 pbCodeBuf[off++] = X86_OP_REX_B;
4536 if (uSubtrahend == 1)
4537 {
4538 pbCodeBuf[off++] = 0xff; /* dec */
4539 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4540 }
4541 else
4542 {
4543 pbCodeBuf[off++] = 0x81;
4544 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4545 pbCodeBuf[off++] = uSubtrahend;
4546 pbCodeBuf[off++] = 0;
4547 }
4548
4549#else
4550 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4551 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4552
4553 /* sub tmp, gstgrp, uSubtrahend */
4554 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
4555
4556 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4557 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4558
4559 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4560#endif
4561
4562 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4563
4564#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4565 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4566#endif
4567
4568 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4569 return off;
4570}
4571
4572
4573#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
4574 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4575
4576#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
4577 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4578
4579/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
4580DECL_INLINE_THROW(uint32_t)
4581iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
4582{
4583 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4584 kIemNativeGstRegUse_ForUpdate);
4585
4586#ifdef RT_ARCH_AMD64
4587 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4588 if (f64Bit)
4589 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4590 else if (idxGstTmpReg >= 8)
4591 pbCodeBuf[off++] = X86_OP_REX_B;
4592 if (uSubtrahend == 1)
4593 {
4594 pbCodeBuf[off++] = 0xff; /* dec */
4595 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4596 }
4597 else if (uSubtrahend < 128)
4598 {
4599 pbCodeBuf[off++] = 0x83; /* sub */
4600 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4601 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4602 }
4603 else
4604 {
4605 pbCodeBuf[off++] = 0x81; /* sub */
4606 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4607 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4608 pbCodeBuf[off++] = 0;
4609 pbCodeBuf[off++] = 0;
4610 pbCodeBuf[off++] = 0;
4611 }
4612
4613#else
4614 /* sub tmp, gstgrp, uSubtrahend */
4615 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4616 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
4617
4618#endif
4619
4620 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4621
4622#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4623 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4624#endif
4625
4626 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4627 return off;
4628}
4629
4630
4631#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
4632 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4633
4634#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
4635 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4636
4637#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
4638 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4639
4640#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
4641 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4642
4643/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
4644DECL_INLINE_THROW(uint32_t)
4645iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4646{
4647#ifdef VBOX_STRICT
4648 switch (cbMask)
4649 {
4650 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4651 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4652 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4653 case sizeof(uint64_t): break;
4654 default: AssertFailedBreak();
4655 }
4656#endif
4657
4658 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4659 kIemNativeGstRegUse_ForUpdate);
4660
4661 switch (cbMask)
4662 {
4663 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4664 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
4665 break;
4666 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
4667 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
4668 break;
4669 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4670 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4671 break;
4672 case sizeof(uint64_t):
4673 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
4674 break;
4675 default: AssertFailedBreak();
4676 }
4677
4678 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4679
4680#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4681 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4682#endif
4683
4684 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4685 return off;
4686}
4687
4688
4689#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
4690 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4691
4692#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
4693 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4694
4695#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
4696 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4697
4698#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
4699 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4700
4701/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
4702DECL_INLINE_THROW(uint32_t)
4703iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4704{
4705#ifdef VBOX_STRICT
4706 switch (cbMask)
4707 {
4708 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4709 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4710 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4711 case sizeof(uint64_t): break;
4712 default: AssertFailedBreak();
4713 }
4714#endif
4715
4716 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4717 kIemNativeGstRegUse_ForUpdate);
4718
4719 switch (cbMask)
4720 {
4721 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4722 case sizeof(uint16_t):
4723 case sizeof(uint64_t):
4724 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
4725 break;
4726 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4727 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4728 break;
4729 default: AssertFailedBreak();
4730 }
4731
4732 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4733
4734#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4735 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4736#endif
4737
4738 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4739 return off;
4740}
4741
4742
4743/*********************************************************************************************************************************
4744* Local/Argument variable manipulation (add, sub, and, or). *
4745*********************************************************************************************************************************/
4746
4747#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
4748 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4749
4750#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
4751 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4752
4753#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
4754 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4755
4756#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
4757 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4758
4759
4760#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
4761 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
4762
4763#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
4764 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
4765
4766#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
4767 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
4768
4769/** Emits code for AND'ing a local and a constant value. */
4770DECL_INLINE_THROW(uint32_t)
4771iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4772{
4773#ifdef VBOX_STRICT
4774 switch (cbMask)
4775 {
4776 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4777 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4778 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4779 case sizeof(uint64_t): break;
4780 default: AssertFailedBreak();
4781 }
4782#endif
4783
4784 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4785 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4786
4787 if (cbMask <= sizeof(uint32_t))
4788 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
4789 else
4790 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
4791
4792 iemNativeVarRegisterRelease(pReNative, idxVar);
4793 return off;
4794}
4795
4796
4797#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
4798 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4799
4800#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
4801 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4802
4803#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
4804 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4805
4806#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
4807 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4808
4809/** Emits code for OR'ing a local and a constant value. */
4810DECL_INLINE_THROW(uint32_t)
4811iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4812{
4813#ifdef VBOX_STRICT
4814 switch (cbMask)
4815 {
4816 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4817 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4818 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4819 case sizeof(uint64_t): break;
4820 default: AssertFailedBreak();
4821 }
4822#endif
4823
4824 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4825 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4826
4827 if (cbMask <= sizeof(uint32_t))
4828 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
4829 else
4830 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
4831
4832 iemNativeVarRegisterRelease(pReNative, idxVar);
4833 return off;
4834}
4835
4836
4837#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
4838 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
4839
4840#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
4841 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
4842
4843#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
4844 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
4845
4846/** Emits code for reversing the byte order in a local value. */
4847DECL_INLINE_THROW(uint32_t)
4848iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
4849{
4850 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4851 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4852
4853 switch (cbLocal)
4854 {
4855 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
4856 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
4857 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
4858 default: AssertFailedBreak();
4859 }
4860
4861 iemNativeVarRegisterRelease(pReNative, idxVar);
4862 return off;
4863}
4864
4865
4866#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
4867 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4868
4869#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
4870 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4871
4872#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
4873 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4874
4875/** Emits code for shifting left a local value. */
4876DECL_INLINE_THROW(uint32_t)
4877iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4878{
4879#ifdef VBOX_STRICT
4880 switch (cbLocal)
4881 {
4882 case sizeof(uint8_t): Assert(cShift < 8); break;
4883 case sizeof(uint16_t): Assert(cShift < 16); break;
4884 case sizeof(uint32_t): Assert(cShift < 32); break;
4885 case sizeof(uint64_t): Assert(cShift < 64); break;
4886 default: AssertFailedBreak();
4887 }
4888#endif
4889
4890 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4891 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4892
4893 if (cbLocal <= sizeof(uint32_t))
4894 {
4895 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
4896 if (cbLocal < sizeof(uint32_t))
4897 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
4898 cbLocal == sizeof(uint16_t)
4899 ? UINT32_C(0xffff)
4900 : UINT32_C(0xff));
4901 }
4902 else
4903 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
4904
4905 iemNativeVarRegisterRelease(pReNative, idxVar);
4906 return off;
4907}
4908
4909
4910#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
4911 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4912
4913#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
4914 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4915
4916#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
4917 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4918
4919/** Emits code for shifting left a local value. */
4920DECL_INLINE_THROW(uint32_t)
4921iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4922{
4923#ifdef VBOX_STRICT
4924 switch (cbLocal)
4925 {
4926 case sizeof(int8_t): Assert(cShift < 8); break;
4927 case sizeof(int16_t): Assert(cShift < 16); break;
4928 case sizeof(int32_t): Assert(cShift < 32); break;
4929 case sizeof(int64_t): Assert(cShift < 64); break;
4930 default: AssertFailedBreak();
4931 }
4932#endif
4933
4934 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4935 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4936
4937 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
4938 if (cbLocal == sizeof(uint8_t))
4939 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4940 else if (cbLocal == sizeof(uint16_t))
4941 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
4942
4943 if (cbLocal <= sizeof(uint32_t))
4944 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
4945 else
4946 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
4947
4948 iemNativeVarRegisterRelease(pReNative, idxVar);
4949 return off;
4950}
4951
4952
4953#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
4954 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
4955
4956#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
4957 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
4958
4959#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
4960 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
4961
4962/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
4963DECL_INLINE_THROW(uint32_t)
4964iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
4965{
4966 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
4967 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
4968 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4969 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4970
4971 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4972 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
4973
4974 /* Need to sign extend the value. */
4975 if (cbLocal <= sizeof(uint32_t))
4976 {
4977/** @todo ARM64: In case of boredone, the extended add instruction can do the
4978 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
4979 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
4980
4981 switch (cbLocal)
4982 {
4983 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
4984 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
4985 default: AssertFailed();
4986 }
4987
4988 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
4989 iemNativeRegFreeTmp(pReNative, idxRegTmp);
4990 }
4991 else
4992 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
4993
4994 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
4995 iemNativeVarRegisterRelease(pReNative, idxVar);
4996 return off;
4997}
4998
4999
5000
5001/*********************************************************************************************************************************
5002* EFLAGS *
5003*********************************************************************************************************************************/
5004
5005#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5006# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5007#else
5008# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5009 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5010
5011DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5012{
5013 if (fEflOutput)
5014 {
5015 PVMCPUCC const pVCpu = pReNative->pVCpu;
5016# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5017 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5018 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5019 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5020# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5021 if (fEflOutput & (a_fEfl)) \
5022 { \
5023 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5024 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5025 else \
5026 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5027 } else do { } while (0)
5028# else
5029 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5030 IEMLIVENESSBIT const LivenessClobbered =
5031 {
5032 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5033 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5034 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5035 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5036 };
5037 IEMLIVENESSBIT const LivenessDelayable =
5038 {
5039 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5040 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5041 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5042 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5043 };
5044# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5045 if (fEflOutput & (a_fEfl)) \
5046 { \
5047 if (LivenessClobbered.a_fLivenessMember) \
5048 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5049 else if (LivenessDelayable.a_fLivenessMember) \
5050 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5051 else \
5052 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5053 } else do { } while (0)
5054# endif
5055 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5056 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5057 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5058 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5059 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5060 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5061 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5062# undef CHECK_FLAG_AND_UPDATE_STATS
5063 }
5064 RT_NOREF(fEflInput);
5065}
5066#endif /* VBOX_WITH_STATISTICS */
5067
5068#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5069#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5070 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5071
5072/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5073DECL_INLINE_THROW(uint32_t)
5074iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5075 uint32_t fEflInput, uint32_t fEflOutput)
5076{
5077 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5078 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5079 RT_NOREF(fEflInput, fEflOutput);
5080
5081#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5082# ifdef VBOX_STRICT
5083 if ( pReNative->idxCurCall != 0
5084 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5085 {
5086 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5087 uint32_t const fBoth = fEflInput | fEflOutput;
5088# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5089 AssertMsg( !(fBoth & (a_fElfConst)) \
5090 || (!(fEflInput & (a_fElfConst)) \
5091 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5092 : !(fEflOutput & (a_fElfConst)) \
5093 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5094 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5095 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5096 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5097 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5098 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5099 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5100 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5101 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5102 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5103# undef ASSERT_ONE_EFL
5104 }
5105# endif
5106#endif
5107
5108 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5109
5110 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
5111 * the existing shadow copy. */
5112 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5113 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5114 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5115 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5116 return off;
5117}
5118
5119
5120
5121/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5122 * start using it with custom native code emission (inlining assembly
5123 * instruction helpers). */
5124#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5125#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5126 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5127 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5128
5129#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5130#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5131 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5132 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5133
5134/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5135DECL_INLINE_THROW(uint32_t)
5136iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5137 bool fUpdateSkipping)
5138{
5139 RT_NOREF(fEflOutput);
5140 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5141 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5142
5143#ifdef VBOX_STRICT
5144 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5145 uint32_t offFixup = off;
5146 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5147 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5148 iemNativeFixupFixedJump(pReNative, offFixup, off);
5149
5150 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5151 offFixup = off;
5152 off = iemNativeEmitJzToFixed(pReNative, off, off);
5153 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5154 iemNativeFixupFixedJump(pReNative, offFixup, off);
5155
5156 /** @todo validate that only bits in the fElfOutput mask changed. */
5157#endif
5158
5159#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5160 if (fUpdateSkipping)
5161 {
5162 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5163 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5164 else
5165 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5166 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5167 }
5168#else
5169 RT_NOREF_PV(fUpdateSkipping);
5170#endif
5171
5172 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5173 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5174 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5175 return off;
5176}
5177
5178
5179typedef enum IEMNATIVEMITEFLOP
5180{
5181 kIemNativeEmitEflOp_Invalid = 0,
5182 kIemNativeEmitEflOp_Set,
5183 kIemNativeEmitEflOp_Clear,
5184 kIemNativeEmitEflOp_Flip
5185} IEMNATIVEMITEFLOP;
5186
5187#define IEM_MC_SET_EFL_BIT(a_fBit) \
5188 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
5189
5190#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5191 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
5192
5193#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5194 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
5195
5196/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5197DECL_INLINE_THROW(uint32_t)
5198iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
5199{
5200 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5201 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
5202
5203 switch (enmOp)
5204 {
5205 case kIemNativeEmitEflOp_Set:
5206 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5207 break;
5208 case kIemNativeEmitEflOp_Clear:
5209 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
5210 break;
5211 case kIemNativeEmitEflOp_Flip:
5212 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5213 break;
5214 default:
5215 AssertFailed();
5216 break;
5217 }
5218
5219 /** @todo No delayed writeback for EFLAGS right now. */
5220 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5221
5222 /* Free but don't flush the EFLAGS register. */
5223 iemNativeRegFreeTmp(pReNative, idxEflReg);
5224
5225 return off;
5226}
5227
5228
5229/*********************************************************************************************************************************
5230* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
5231*********************************************************************************************************************************/
5232
5233#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
5234 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
5235
5236#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
5237 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
5238
5239#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
5240 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
5241
5242
5243/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
5244 * IEM_MC_FETCH_SREG_ZX_U64. */
5245DECL_INLINE_THROW(uint32_t)
5246iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
5247{
5248 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5249 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
5250 Assert(iSReg < X86_SREG_COUNT);
5251
5252 /*
5253 * For now, we will not create a shadow copy of a selector. The rational
5254 * is that since we do not recompile the popping and loading of segment
5255 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
5256 * pushing and moving to registers, there is only a small chance that the
5257 * shadow copy will be accessed again before the register is reloaded. One
5258 * scenario would be nested called in 16-bit code, but I doubt it's worth
5259 * the extra register pressure atm.
5260 *
5261 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
5262 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
5263 * store scencario covered at present (r160730).
5264 */
5265 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5266 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
5267 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
5268 iemNativeVarRegisterRelease(pReNative, idxDstVar);
5269 return off;
5270}
5271
5272
5273
5274/*********************************************************************************************************************************
5275* Register references. *
5276*********************************************************************************************************************************/
5277
5278#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
5279 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
5280
5281#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
5282 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
5283
5284/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
5285DECL_INLINE_THROW(uint32_t)
5286iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
5287{
5288 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
5289 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5290 Assert(iGRegEx < 20);
5291
5292 if (iGRegEx < 16)
5293 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5294 else
5295 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
5296
5297 /* If we've delayed writing back the register value, flush it now. */
5298 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5299
5300 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5301 if (!fConst)
5302 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
5303
5304 return off;
5305}
5306
5307#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
5308 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
5309
5310#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
5311 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
5312
5313#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
5314 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
5315
5316#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
5317 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
5318
5319#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
5320 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
5321
5322#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
5323 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
5324
5325#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
5326 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
5327
5328#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
5329 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
5330
5331#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
5332 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
5333
5334#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
5335 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
5336
5337/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
5338DECL_INLINE_THROW(uint32_t)
5339iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
5340{
5341 Assert(iGReg < 16);
5342 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
5343 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5344
5345 /* If we've delayed writing back the register value, flush it now. */
5346 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
5347
5348 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5349 if (!fConst)
5350 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
5351
5352 return off;
5353}
5354
5355
5356#undef IEM_MC_REF_EFLAGS /* should not be used. */
5357#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
5358 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5359 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
5360
5361/** Handles IEM_MC_REF_EFLAGS. */
5362DECL_INLINE_THROW(uint32_t)
5363iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
5364{
5365 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
5366 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5367
5368#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5369 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5370
5371 /* Updating the skipping according to the outputs is a little early, but
5372 we don't have any other hooks for references atm. */
5373 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5374 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5375 else if (fEflOutput & X86_EFL_STATUS_BITS)
5376 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5377 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5378#else
5379 RT_NOREF(fEflInput, fEflOutput);
5380#endif
5381
5382 /* If we've delayed writing back the register value, flush it now. */
5383 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
5384
5385 /* If there is a shadow copy of guest EFLAGS, flush it now. */
5386 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
5387
5388 return off;
5389}
5390
5391
5392/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
5393 * different code from threaded recompiler, maybe it would be helpful. For now
5394 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
5395#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
5396
5397
5398#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
5399 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
5400
5401#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
5402 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
5403
5404#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
5405 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
5406
5407#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
5408 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
5409
5410#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5411/* Just being paranoid here. */
5412# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
5413AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
5414AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
5415AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
5416AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
5417# endif
5418AssertCompileMemberOffset(X86XMMREG, au64, 0);
5419AssertCompileMemberOffset(X86XMMREG, au32, 0);
5420AssertCompileMemberOffset(X86XMMREG, ar64, 0);
5421AssertCompileMemberOffset(X86XMMREG, ar32, 0);
5422
5423# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
5424 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
5425# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
5426 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
5427# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
5428 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
5429# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
5430 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
5431#endif
5432
5433/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
5434DECL_INLINE_THROW(uint32_t)
5435iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
5436{
5437 Assert(iXReg < 16);
5438 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
5439 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5440
5441 /* If we've delayed writing back the register value, flush it now. */
5442 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
5443
5444#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5445 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5446 if (!fConst)
5447 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
5448#else
5449 RT_NOREF(fConst);
5450#endif
5451
5452 return off;
5453}
5454
5455
5456
5457/*********************************************************************************************************************************
5458* Effective Address Calculation *
5459*********************************************************************************************************************************/
5460#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
5461 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
5462
5463/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
5464 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
5465DECL_INLINE_THROW(uint32_t)
5466iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5467 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
5468{
5469 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5470
5471 /*
5472 * Handle the disp16 form with no registers first.
5473 *
5474 * Convert to an immediate value, as that'll delay the register allocation
5475 * and assignment till the memory access / call / whatever and we can use
5476 * a more appropriate register (or none at all).
5477 */
5478 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
5479 {
5480 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
5481 return off;
5482 }
5483
5484 /* Determin the displacment. */
5485 uint16_t u16EffAddr;
5486 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5487 {
5488 case 0: u16EffAddr = 0; break;
5489 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
5490 case 2: u16EffAddr = u16Disp; break;
5491 default: AssertFailedStmt(u16EffAddr = 0);
5492 }
5493
5494 /* Determine the registers involved. */
5495 uint8_t idxGstRegBase;
5496 uint8_t idxGstRegIndex;
5497 switch (bRm & X86_MODRM_RM_MASK)
5498 {
5499 case 0:
5500 idxGstRegBase = X86_GREG_xBX;
5501 idxGstRegIndex = X86_GREG_xSI;
5502 break;
5503 case 1:
5504 idxGstRegBase = X86_GREG_xBX;
5505 idxGstRegIndex = X86_GREG_xDI;
5506 break;
5507 case 2:
5508 idxGstRegBase = X86_GREG_xBP;
5509 idxGstRegIndex = X86_GREG_xSI;
5510 break;
5511 case 3:
5512 idxGstRegBase = X86_GREG_xBP;
5513 idxGstRegIndex = X86_GREG_xDI;
5514 break;
5515 case 4:
5516 idxGstRegBase = X86_GREG_xSI;
5517 idxGstRegIndex = UINT8_MAX;
5518 break;
5519 case 5:
5520 idxGstRegBase = X86_GREG_xDI;
5521 idxGstRegIndex = UINT8_MAX;
5522 break;
5523 case 6:
5524 idxGstRegBase = X86_GREG_xBP;
5525 idxGstRegIndex = UINT8_MAX;
5526 break;
5527#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
5528 default:
5529#endif
5530 case 7:
5531 idxGstRegBase = X86_GREG_xBX;
5532 idxGstRegIndex = UINT8_MAX;
5533 break;
5534 }
5535
5536 /*
5537 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
5538 */
5539 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5540 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5541 kIemNativeGstRegUse_ReadOnly);
5542 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
5543 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5544 kIemNativeGstRegUse_ReadOnly)
5545 : UINT8_MAX;
5546#ifdef RT_ARCH_AMD64
5547 if (idxRegIndex == UINT8_MAX)
5548 {
5549 if (u16EffAddr == 0)
5550 {
5551 /* movxz ret, base */
5552 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
5553 }
5554 else
5555 {
5556 /* lea ret32, [base64 + disp32] */
5557 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5558 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5559 if (idxRegRet >= 8 || idxRegBase >= 8)
5560 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5561 pbCodeBuf[off++] = 0x8d;
5562 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5563 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
5564 else
5565 {
5566 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
5567 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5568 }
5569 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5570 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5571 pbCodeBuf[off++] = 0;
5572 pbCodeBuf[off++] = 0;
5573 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5574
5575 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5576 }
5577 }
5578 else
5579 {
5580 /* lea ret32, [index64 + base64 (+ disp32)] */
5581 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5582 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5583 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5584 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5585 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5586 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5587 pbCodeBuf[off++] = 0x8d;
5588 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
5589 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5590 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
5591 if (bMod == X86_MOD_MEM4)
5592 {
5593 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5594 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5595 pbCodeBuf[off++] = 0;
5596 pbCodeBuf[off++] = 0;
5597 }
5598 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5599 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5600 }
5601
5602#elif defined(RT_ARCH_ARM64)
5603 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5604 if (u16EffAddr == 0)
5605 {
5606 if (idxRegIndex == UINT8_MAX)
5607 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
5608 else
5609 {
5610 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
5611 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5612 }
5613 }
5614 else
5615 {
5616 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
5617 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
5618 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
5619 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5620 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
5621 else
5622 {
5623 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
5624 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5625 }
5626 if (idxRegIndex != UINT8_MAX)
5627 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
5628 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5629 }
5630
5631#else
5632# error "port me"
5633#endif
5634
5635 if (idxRegIndex != UINT8_MAX)
5636 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5637 iemNativeRegFreeTmp(pReNative, idxRegBase);
5638 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5639 return off;
5640}
5641
5642
5643#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
5644 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
5645
5646/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
5647 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
5648DECL_INLINE_THROW(uint32_t)
5649iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5650 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
5651{
5652 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5653
5654 /*
5655 * Handle the disp32 form with no registers first.
5656 *
5657 * Convert to an immediate value, as that'll delay the register allocation
5658 * and assignment till the memory access / call / whatever and we can use
5659 * a more appropriate register (or none at all).
5660 */
5661 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5662 {
5663 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
5664 return off;
5665 }
5666
5667 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
5668 uint32_t u32EffAddr = 0;
5669 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5670 {
5671 case 0: break;
5672 case 1: u32EffAddr = (int8_t)u32Disp; break;
5673 case 2: u32EffAddr = u32Disp; break;
5674 default: AssertFailed();
5675 }
5676
5677 /* Get the register (or SIB) value. */
5678 uint8_t idxGstRegBase = UINT8_MAX;
5679 uint8_t idxGstRegIndex = UINT8_MAX;
5680 uint8_t cShiftIndex = 0;
5681 switch (bRm & X86_MODRM_RM_MASK)
5682 {
5683 case 0: idxGstRegBase = X86_GREG_xAX; break;
5684 case 1: idxGstRegBase = X86_GREG_xCX; break;
5685 case 2: idxGstRegBase = X86_GREG_xDX; break;
5686 case 3: idxGstRegBase = X86_GREG_xBX; break;
5687 case 4: /* SIB */
5688 {
5689 /* index /w scaling . */
5690 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
5691 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
5692 {
5693 case 0: idxGstRegIndex = X86_GREG_xAX; break;
5694 case 1: idxGstRegIndex = X86_GREG_xCX; break;
5695 case 2: idxGstRegIndex = X86_GREG_xDX; break;
5696 case 3: idxGstRegIndex = X86_GREG_xBX; break;
5697 case 4: cShiftIndex = 0; /*no index*/ break;
5698 case 5: idxGstRegIndex = X86_GREG_xBP; break;
5699 case 6: idxGstRegIndex = X86_GREG_xSI; break;
5700 case 7: idxGstRegIndex = X86_GREG_xDI; break;
5701 }
5702
5703 /* base */
5704 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
5705 {
5706 case 0: idxGstRegBase = X86_GREG_xAX; break;
5707 case 1: idxGstRegBase = X86_GREG_xCX; break;
5708 case 2: idxGstRegBase = X86_GREG_xDX; break;
5709 case 3: idxGstRegBase = X86_GREG_xBX; break;
5710 case 4:
5711 idxGstRegBase = X86_GREG_xSP;
5712 u32EffAddr += uSibAndRspOffset >> 8;
5713 break;
5714 case 5:
5715 if ((bRm & X86_MODRM_MOD_MASK) != 0)
5716 idxGstRegBase = X86_GREG_xBP;
5717 else
5718 {
5719 Assert(u32EffAddr == 0);
5720 u32EffAddr = u32Disp;
5721 }
5722 break;
5723 case 6: idxGstRegBase = X86_GREG_xSI; break;
5724 case 7: idxGstRegBase = X86_GREG_xDI; break;
5725 }
5726 break;
5727 }
5728 case 5: idxGstRegBase = X86_GREG_xBP; break;
5729 case 6: idxGstRegBase = X86_GREG_xSI; break;
5730 case 7: idxGstRegBase = X86_GREG_xDI; break;
5731 }
5732
5733 /*
5734 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
5735 * the start of the function.
5736 */
5737 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
5738 {
5739 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
5740 return off;
5741 }
5742
5743 /*
5744 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5745 */
5746 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5747 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
5748 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5749 kIemNativeGstRegUse_ReadOnly);
5750 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
5751 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5752 kIemNativeGstRegUse_ReadOnly);
5753
5754 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
5755 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
5756 {
5757 idxRegBase = idxRegIndex;
5758 idxRegIndex = UINT8_MAX;
5759 }
5760
5761#ifdef RT_ARCH_AMD64
5762 if (idxRegIndex == UINT8_MAX)
5763 {
5764 if (u32EffAddr == 0)
5765 {
5766 /* mov ret, base */
5767 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5768 }
5769 else
5770 {
5771 /* lea ret32, [base64 + disp32] */
5772 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5773 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5774 if (idxRegRet >= 8 || idxRegBase >= 8)
5775 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5776 pbCodeBuf[off++] = 0x8d;
5777 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5778 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5779 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
5780 else
5781 {
5782 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5783 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5784 }
5785 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5786 if (bMod == X86_MOD_MEM4)
5787 {
5788 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5789 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5790 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5791 }
5792 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5793 }
5794 }
5795 else
5796 {
5797 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5798 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5799 if (idxRegBase == UINT8_MAX)
5800 {
5801 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
5802 if (idxRegRet >= 8 || idxRegIndex >= 8)
5803 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5804 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5805 pbCodeBuf[off++] = 0x8d;
5806 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
5807 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
5808 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5809 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5810 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5811 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5812 }
5813 else
5814 {
5815 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
5816 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5817 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5818 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5819 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5820 pbCodeBuf[off++] = 0x8d;
5821 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
5822 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5823 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5824 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
5825 if (bMod != X86_MOD_MEM0)
5826 {
5827 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5828 if (bMod == X86_MOD_MEM4)
5829 {
5830 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5831 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5832 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5833 }
5834 }
5835 }
5836 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5837 }
5838
5839#elif defined(RT_ARCH_ARM64)
5840 if (u32EffAddr == 0)
5841 {
5842 if (idxRegIndex == UINT8_MAX)
5843 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5844 else if (idxRegBase == UINT8_MAX)
5845 {
5846 if (cShiftIndex == 0)
5847 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
5848 else
5849 {
5850 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5851 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
5852 }
5853 }
5854 else
5855 {
5856 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5857 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
5858 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5859 }
5860 }
5861 else
5862 {
5863 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
5864 {
5865 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5866 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
5867 }
5868 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
5869 {
5870 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5871 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5872 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
5873 }
5874 else
5875 {
5876 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
5877 if (idxRegBase != UINT8_MAX)
5878 {
5879 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5880 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5881 }
5882 }
5883 if (idxRegIndex != UINT8_MAX)
5884 {
5885 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5886 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5887 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5888 }
5889 }
5890
5891#else
5892# error "port me"
5893#endif
5894
5895 if (idxRegIndex != UINT8_MAX)
5896 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5897 if (idxRegBase != UINT8_MAX)
5898 iemNativeRegFreeTmp(pReNative, idxRegBase);
5899 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5900 return off;
5901}
5902
5903
5904#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5905 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5906 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5907
5908#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5909 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5910 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5911
5912#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5913 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5914 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
5915
5916/**
5917 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
5918 *
5919 * @returns New off.
5920 * @param pReNative .
5921 * @param off .
5922 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
5923 * bit 4 to REX.X. The two bits are part of the
5924 * REG sub-field, which isn't needed in this
5925 * function.
5926 * @param uSibAndRspOffset Two parts:
5927 * - The first 8 bits make up the SIB byte.
5928 * - The next 8 bits are the fixed RSP/ESP offset
5929 * in case of a pop [xSP].
5930 * @param u32Disp The displacement byte/word/dword, if any.
5931 * @param cbInstr The size of the fully decoded instruction. Used
5932 * for RIP relative addressing.
5933 * @param idxVarRet The result variable number.
5934 * @param f64Bit Whether to use a 64-bit or 32-bit address size
5935 * when calculating the address.
5936 *
5937 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
5938 */
5939DECL_INLINE_THROW(uint32_t)
5940iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
5941 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
5942{
5943 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5944
5945 /*
5946 * Special case the rip + disp32 form first.
5947 */
5948 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5949 {
5950#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5951 /* Need to take the current PC offset into account for the displacement, no need to flush here
5952 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
5953 u32Disp += pReNative->Core.offPc;
5954#endif
5955
5956 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5957 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
5958 kIemNativeGstRegUse_ReadOnly);
5959#ifdef RT_ARCH_AMD64
5960 if (f64Bit)
5961 {
5962 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
5963 if ((int32_t)offFinalDisp == offFinalDisp)
5964 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
5965 else
5966 {
5967 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
5968 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
5969 }
5970 }
5971 else
5972 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
5973
5974#elif defined(RT_ARCH_ARM64)
5975 if (f64Bit)
5976 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
5977 (int64_t)(int32_t)u32Disp + cbInstr);
5978 else
5979 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
5980 (int32_t)u32Disp + cbInstr);
5981
5982#else
5983# error "Port me!"
5984#endif
5985 iemNativeRegFreeTmp(pReNative, idxRegPc);
5986 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5987 return off;
5988 }
5989
5990 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
5991 int64_t i64EffAddr = 0;
5992 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5993 {
5994 case 0: break;
5995 case 1: i64EffAddr = (int8_t)u32Disp; break;
5996 case 2: i64EffAddr = (int32_t)u32Disp; break;
5997 default: AssertFailed();
5998 }
5999
6000 /* Get the register (or SIB) value. */
6001 uint8_t idxGstRegBase = UINT8_MAX;
6002 uint8_t idxGstRegIndex = UINT8_MAX;
6003 uint8_t cShiftIndex = 0;
6004 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6005 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6006 else /* SIB: */
6007 {
6008 /* index /w scaling . */
6009 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6010 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6011 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6012 if (idxGstRegIndex == 4)
6013 {
6014 /* no index */
6015 cShiftIndex = 0;
6016 idxGstRegIndex = UINT8_MAX;
6017 }
6018
6019 /* base */
6020 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6021 if (idxGstRegBase == 4)
6022 {
6023 /* pop [rsp] hack */
6024 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6025 }
6026 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6027 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6028 {
6029 /* mod=0 and base=5 -> disp32, no base reg. */
6030 Assert(i64EffAddr == 0);
6031 i64EffAddr = (int32_t)u32Disp;
6032 idxGstRegBase = UINT8_MAX;
6033 }
6034 }
6035
6036 /*
6037 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6038 * the start of the function.
6039 */
6040 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6041 {
6042 if (f64Bit)
6043 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6044 else
6045 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6046 return off;
6047 }
6048
6049 /*
6050 * Now emit code that calculates:
6051 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6052 * or if !f64Bit:
6053 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6054 */
6055 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6056 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6057 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6058 kIemNativeGstRegUse_ReadOnly);
6059 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6060 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6061 kIemNativeGstRegUse_ReadOnly);
6062
6063 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6064 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6065 {
6066 idxRegBase = idxRegIndex;
6067 idxRegIndex = UINT8_MAX;
6068 }
6069
6070#ifdef RT_ARCH_AMD64
6071 uint8_t bFinalAdj;
6072 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6073 bFinalAdj = 0; /* likely */
6074 else
6075 {
6076 /* pop [rsp] with a problematic disp32 value. Split out the
6077 RSP offset and add it separately afterwards (bFinalAdj). */
6078 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6079 Assert(idxGstRegBase == X86_GREG_xSP);
6080 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6081 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6082 Assert(bFinalAdj != 0);
6083 i64EffAddr -= bFinalAdj;
6084 Assert((int32_t)i64EffAddr == i64EffAddr);
6085 }
6086 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6087//pReNative->pInstrBuf[off++] = 0xcc;
6088
6089 if (idxRegIndex == UINT8_MAX)
6090 {
6091 if (u32EffAddr == 0)
6092 {
6093 /* mov ret, base */
6094 if (f64Bit)
6095 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6096 else
6097 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6098 }
6099 else
6100 {
6101 /* lea ret, [base + disp32] */
6102 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6103 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6104 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6105 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6106 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6107 | (f64Bit ? X86_OP_REX_W : 0);
6108 pbCodeBuf[off++] = 0x8d;
6109 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6110 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6111 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6112 else
6113 {
6114 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6115 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6116 }
6117 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6118 if (bMod == X86_MOD_MEM4)
6119 {
6120 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6121 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6122 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6123 }
6124 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6125 }
6126 }
6127 else
6128 {
6129 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6130 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6131 if (idxRegBase == UINT8_MAX)
6132 {
6133 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6134 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6135 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6136 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6137 | (f64Bit ? X86_OP_REX_W : 0);
6138 pbCodeBuf[off++] = 0x8d;
6139 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6140 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6141 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6142 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6143 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6144 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6145 }
6146 else
6147 {
6148 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6149 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6150 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6151 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6152 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6153 | (f64Bit ? X86_OP_REX_W : 0);
6154 pbCodeBuf[off++] = 0x8d;
6155 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6156 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6157 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6158 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6159 if (bMod != X86_MOD_MEM0)
6160 {
6161 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6162 if (bMod == X86_MOD_MEM4)
6163 {
6164 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6165 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6166 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6167 }
6168 }
6169 }
6170 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6171 }
6172
6173 if (!bFinalAdj)
6174 { /* likely */ }
6175 else
6176 {
6177 Assert(f64Bit);
6178 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6179 }
6180
6181#elif defined(RT_ARCH_ARM64)
6182 if (i64EffAddr == 0)
6183 {
6184 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6185 if (idxRegIndex == UINT8_MAX)
6186 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6187 else if (idxRegBase != UINT8_MAX)
6188 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6189 f64Bit, false /*fSetFlags*/, cShiftIndex);
6190 else
6191 {
6192 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6193 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6194 }
6195 }
6196 else
6197 {
6198 if (f64Bit)
6199 { /* likely */ }
6200 else
6201 i64EffAddr = (int32_t)i64EffAddr;
6202
6203 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6204 {
6205 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6206 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
6207 }
6208 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
6209 {
6210 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6211 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
6212 }
6213 else
6214 {
6215 if (f64Bit)
6216 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
6217 else
6218 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
6219 if (idxRegBase != UINT8_MAX)
6220 {
6221 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6222 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
6223 }
6224 }
6225 if (idxRegIndex != UINT8_MAX)
6226 {
6227 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6228 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6229 f64Bit, false /*fSetFlags*/, cShiftIndex);
6230 }
6231 }
6232
6233#else
6234# error "port me"
6235#endif
6236
6237 if (idxRegIndex != UINT8_MAX)
6238 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6239 if (idxRegBase != UINT8_MAX)
6240 iemNativeRegFreeTmp(pReNative, idxRegBase);
6241 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6242 return off;
6243}
6244
6245
6246/*********************************************************************************************************************************
6247* Memory fetches and stores common *
6248*********************************************************************************************************************************/
6249
6250typedef enum IEMNATIVEMITMEMOP
6251{
6252 kIemNativeEmitMemOp_Store = 0,
6253 kIemNativeEmitMemOp_Fetch,
6254 kIemNativeEmitMemOp_Fetch_Zx_U16,
6255 kIemNativeEmitMemOp_Fetch_Zx_U32,
6256 kIemNativeEmitMemOp_Fetch_Zx_U64,
6257 kIemNativeEmitMemOp_Fetch_Sx_U16,
6258 kIemNativeEmitMemOp_Fetch_Sx_U32,
6259 kIemNativeEmitMemOp_Fetch_Sx_U64
6260} IEMNATIVEMITMEMOP;
6261
6262/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
6263 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
6264 * (with iSegReg = UINT8_MAX). */
6265DECL_INLINE_THROW(uint32_t)
6266iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
6267 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
6268 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
6269{
6270 /*
6271 * Assert sanity.
6272 */
6273 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6274 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6275 Assert( enmOp != kIemNativeEmitMemOp_Store
6276 || pVarValue->enmKind == kIemNativeVarKind_Immediate
6277 || pVarValue->enmKind == kIemNativeVarKind_Stack);
6278 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
6279 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
6280 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
6281 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
6282 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6283 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
6284#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6285 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
6286 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
6287#else
6288 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
6289#endif
6290 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
6291 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6292#ifdef VBOX_STRICT
6293 if (iSegReg == UINT8_MAX)
6294 {
6295 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6296 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6297 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6298 switch (cbMem)
6299 {
6300 case 1:
6301 Assert( pfnFunction
6302 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
6303 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6304 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6305 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6306 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6307 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
6308 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
6309 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
6310 : UINT64_C(0xc000b000a0009000) ));
6311 Assert(!fAlignMaskAndCtl);
6312 break;
6313 case 2:
6314 Assert( pfnFunction
6315 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
6316 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6317 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6318 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6319 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
6320 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
6321 : UINT64_C(0xc000b000a0009000) ));
6322 Assert(fAlignMaskAndCtl <= 1);
6323 break;
6324 case 4:
6325 Assert( pfnFunction
6326 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
6327 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6328 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6329 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
6330 : UINT64_C(0xc000b000a0009000) ));
6331 Assert(fAlignMaskAndCtl <= 3);
6332 break;
6333 case 8:
6334 Assert( pfnFunction
6335 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
6336 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
6337 : UINT64_C(0xc000b000a0009000) ));
6338 Assert(fAlignMaskAndCtl <= 7);
6339 break;
6340#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6341 case sizeof(RTUINT128U):
6342 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6343 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
6344 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6345 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
6346 || ( enmOp == kIemNativeEmitMemOp_Store
6347 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6348 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
6349 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6350 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6351 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6352 : fAlignMaskAndCtl <= 15);
6353 break;
6354 case sizeof(RTUINT256U):
6355 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6356 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
6357 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
6358 || ( enmOp == kIemNativeEmitMemOp_Store
6359 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
6360 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
6361 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
6362 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
6363 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6364 : fAlignMaskAndCtl <= 31);
6365 break;
6366#endif
6367 }
6368 }
6369 else
6370 {
6371 Assert(iSegReg < 6);
6372 switch (cbMem)
6373 {
6374 case 1:
6375 Assert( pfnFunction
6376 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
6377 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
6378 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6379 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6380 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6381 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
6382 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
6383 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
6384 : UINT64_C(0xc000b000a0009000) ));
6385 Assert(!fAlignMaskAndCtl);
6386 break;
6387 case 2:
6388 Assert( pfnFunction
6389 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
6390 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
6391 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6392 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6393 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
6394 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
6395 : UINT64_C(0xc000b000a0009000) ));
6396 Assert(fAlignMaskAndCtl <= 1);
6397 break;
6398 case 4:
6399 Assert( pfnFunction
6400 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
6401 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
6402 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
6403 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
6404 : UINT64_C(0xc000b000a0009000) ));
6405 Assert(fAlignMaskAndCtl <= 3);
6406 break;
6407 case 8:
6408 Assert( pfnFunction
6409 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
6410 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
6411 : UINT64_C(0xc000b000a0009000) ));
6412 Assert(fAlignMaskAndCtl <= 7);
6413 break;
6414#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6415 case sizeof(RTUINT128U):
6416 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6417 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
6418 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6419 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
6420 || ( enmOp == kIemNativeEmitMemOp_Store
6421 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6422 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
6423 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6424 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6425 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6426 : fAlignMaskAndCtl <= 15);
6427 break;
6428 case sizeof(RTUINT256U):
6429 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6430 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
6431 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
6432 || ( enmOp == kIemNativeEmitMemOp_Store
6433 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
6434 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
6435 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
6436 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
6437 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6438 : fAlignMaskAndCtl <= 31);
6439 break;
6440#endif
6441 }
6442 }
6443#endif
6444
6445#ifdef VBOX_STRICT
6446 /*
6447 * Check that the fExec flags we've got make sense.
6448 */
6449 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6450#endif
6451
6452 /*
6453 * To keep things simple we have to commit any pending writes first as we
6454 * may end up making calls.
6455 */
6456 /** @todo we could postpone this till we make the call and reload the
6457 * registers after returning from the call. Not sure if that's sensible or
6458 * not, though. */
6459#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6460 off = iemNativeRegFlushPendingWrites(pReNative, off);
6461#else
6462 /* The program counter is treated differently for now. */
6463 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
6464#endif
6465
6466#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6467 /*
6468 * Move/spill/flush stuff out of call-volatile registers.
6469 * This is the easy way out. We could contain this to the tlb-miss branch
6470 * by saving and restoring active stuff here.
6471 */
6472 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
6473#endif
6474
6475 /*
6476 * Define labels and allocate the result register (trying for the return
6477 * register if we can).
6478 */
6479 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6480#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6481 uint8_t idxRegValueFetch = UINT8_MAX;
6482
6483 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6484 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6485 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
6486 else
6487 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6488 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6489 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6490 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6491#else
6492 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6493 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6494 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6495 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6496#endif
6497 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
6498
6499#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6500 uint8_t idxRegValueStore = UINT8_MAX;
6501
6502 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6503 idxRegValueStore = !TlbState.fSkip
6504 && enmOp == kIemNativeEmitMemOp_Store
6505 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6506 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6507 : UINT8_MAX;
6508 else
6509 idxRegValueStore = !TlbState.fSkip
6510 && enmOp == kIemNativeEmitMemOp_Store
6511 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6512 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6513 : UINT8_MAX;
6514
6515#else
6516 uint8_t const idxRegValueStore = !TlbState.fSkip
6517 && enmOp == kIemNativeEmitMemOp_Store
6518 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6519 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6520 : UINT8_MAX;
6521#endif
6522 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6523 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6524 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6525 : UINT32_MAX;
6526
6527 /*
6528 * Jump to the TLB lookup code.
6529 */
6530 if (!TlbState.fSkip)
6531 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6532
6533 /*
6534 * TlbMiss:
6535 *
6536 * Call helper to do the fetching.
6537 * We flush all guest register shadow copies here.
6538 */
6539 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
6540
6541#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6542 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6543#else
6544 RT_NOREF(idxInstr);
6545#endif
6546
6547#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6548 if (pReNative->Core.offPc)
6549 {
6550 /*
6551 * Update the program counter but restore it at the end of the TlbMiss branch.
6552 * This should allow delaying more program counter updates for the TlbLookup and hit paths
6553 * which are hopefully much more frequent, reducing the amount of memory accesses.
6554 */
6555 /* Allocate a temporary PC register. */
6556 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6557
6558 /* Perform the addition and store the result. */
6559 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6560 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6561
6562 /* Free and flush the PC register. */
6563 iemNativeRegFreeTmp(pReNative, idxPcReg);
6564 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6565 }
6566#endif
6567
6568#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6569 /* Save variables in volatile registers. */
6570 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6571 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
6572 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
6573 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6574#endif
6575
6576 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
6577 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
6578#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6579 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6580 {
6581 /*
6582 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
6583 *
6584 * @note There was a register variable assigned to the variable for the TlbLookup case above
6585 * which must not be freed or the value loaded into the register will not be synced into the register
6586 * further down the road because the variable doesn't know it had a variable assigned.
6587 *
6588 * @note For loads it is not required to sync what is in the assigned register with the stack slot
6589 * as it will be overwritten anyway.
6590 */
6591 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6592 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
6593 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
6594 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6595 }
6596 else
6597#endif
6598 if (enmOp == kIemNativeEmitMemOp_Store)
6599 {
6600 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6601 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
6602#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6603 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6604#else
6605 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
6606 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6607#endif
6608 }
6609
6610 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
6611 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
6612#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6613 fVolGregMask);
6614#else
6615 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
6616#endif
6617
6618 if (iSegReg != UINT8_MAX)
6619 {
6620 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
6621 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
6622 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
6623 }
6624
6625 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6626 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6627
6628 /* Done setting up parameters, make the call. */
6629 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6630
6631 /*
6632 * Put the result in the right register if this is a fetch.
6633 */
6634 if (enmOp != kIemNativeEmitMemOp_Store)
6635 {
6636#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6637 if ( cbMem == sizeof(RTUINT128U)
6638 || cbMem == sizeof(RTUINT256U))
6639 {
6640 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
6641
6642 /* Sync the value on the stack with the host register assigned to the variable. */
6643 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
6644 }
6645 else
6646#endif
6647 {
6648 Assert(idxRegValueFetch == pVarValue->idxReg);
6649 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
6650 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
6651 }
6652 }
6653
6654#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6655 /* Restore variables and guest shadow registers to volatile registers. */
6656 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6657 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6658#endif
6659
6660#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6661 if (pReNative->Core.offPc)
6662 {
6663 /*
6664 * Time to restore the program counter to its original value.
6665 */
6666 /* Allocate a temporary PC register. */
6667 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6668 kIemNativeGstRegUse_ForUpdate);
6669
6670 /* Restore the original value. */
6671 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6672 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6673
6674 /* Free and flush the PC register. */
6675 iemNativeRegFreeTmp(pReNative, idxPcReg);
6676 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6677 }
6678#endif
6679
6680#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6681 if (!TlbState.fSkip)
6682 {
6683 /* end of TlbMiss - Jump to the done label. */
6684 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6685 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6686
6687 /*
6688 * TlbLookup:
6689 */
6690 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
6691 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
6692 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
6693
6694 /*
6695 * Emit code to do the actual storing / fetching.
6696 */
6697 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6698# ifdef IEM_WITH_TLB_STATISTICS
6699 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6700 enmOp == kIemNativeEmitMemOp_Store
6701 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
6702 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
6703# endif
6704 switch (enmOp)
6705 {
6706 case kIemNativeEmitMemOp_Store:
6707 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
6708 {
6709 switch (cbMem)
6710 {
6711 case 1:
6712 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6713 break;
6714 case 2:
6715 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6716 break;
6717 case 4:
6718 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6719 break;
6720 case 8:
6721 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6722 break;
6723#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6724 case sizeof(RTUINT128U):
6725 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6726 break;
6727 case sizeof(RTUINT256U):
6728 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6729 break;
6730#endif
6731 default:
6732 AssertFailed();
6733 }
6734 }
6735 else
6736 {
6737 switch (cbMem)
6738 {
6739 case 1:
6740 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
6741 idxRegMemResult, TlbState.idxReg1);
6742 break;
6743 case 2:
6744 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6745 idxRegMemResult, TlbState.idxReg1);
6746 break;
6747 case 4:
6748 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6749 idxRegMemResult, TlbState.idxReg1);
6750 break;
6751 case 8:
6752 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
6753 idxRegMemResult, TlbState.idxReg1);
6754 break;
6755 default:
6756 AssertFailed();
6757 }
6758 }
6759 break;
6760
6761 case kIemNativeEmitMemOp_Fetch:
6762 case kIemNativeEmitMemOp_Fetch_Zx_U16:
6763 case kIemNativeEmitMemOp_Fetch_Zx_U32:
6764 case kIemNativeEmitMemOp_Fetch_Zx_U64:
6765 switch (cbMem)
6766 {
6767 case 1:
6768 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6769 break;
6770 case 2:
6771 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6772 break;
6773 case 4:
6774 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6775 break;
6776 case 8:
6777 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6778 break;
6779#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6780 case sizeof(RTUINT128U):
6781 /*
6782 * No need to sync back the register with the stack, this is done by the generic variable handling
6783 * code if there is a register assigned to a variable and the stack must be accessed.
6784 */
6785 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6786 break;
6787 case sizeof(RTUINT256U):
6788 /*
6789 * No need to sync back the register with the stack, this is done by the generic variable handling
6790 * code if there is a register assigned to a variable and the stack must be accessed.
6791 */
6792 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6793 break;
6794#endif
6795 default:
6796 AssertFailed();
6797 }
6798 break;
6799
6800 case kIemNativeEmitMemOp_Fetch_Sx_U16:
6801 Assert(cbMem == 1);
6802 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6803 break;
6804
6805 case kIemNativeEmitMemOp_Fetch_Sx_U32:
6806 Assert(cbMem == 1 || cbMem == 2);
6807 if (cbMem == 1)
6808 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6809 else
6810 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6811 break;
6812
6813 case kIemNativeEmitMemOp_Fetch_Sx_U64:
6814 switch (cbMem)
6815 {
6816 case 1:
6817 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6818 break;
6819 case 2:
6820 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6821 break;
6822 case 4:
6823 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6824 break;
6825 default:
6826 AssertFailed();
6827 }
6828 break;
6829
6830 default:
6831 AssertFailed();
6832 }
6833
6834 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6835
6836 /*
6837 * TlbDone:
6838 */
6839 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6840
6841 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
6842
6843# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6844 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
6845 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6846# endif
6847 }
6848#else
6849 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
6850#endif
6851
6852 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
6853 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6854 return off;
6855}
6856
6857
6858
6859/*********************************************************************************************************************************
6860* Memory fetches (IEM_MEM_FETCH_XXX). *
6861*********************************************************************************************************************************/
6862
6863/* 8-bit segmented: */
6864#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
6865 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
6866 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
6867 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6868
6869#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6870 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6871 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
6872 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6873
6874#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6875 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6876 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6877 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6878
6879#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6880 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6881 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6882 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6883
6884#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6885 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6886 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
6887 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
6888
6889#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6890 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6891 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6892 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
6893
6894#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6895 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6896 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6897 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
6898
6899/* 16-bit segmented: */
6900#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6901 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6902 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6903 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6904
6905#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6906 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6907 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6908 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
6909
6910#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6911 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6912 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6913 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6914
6915#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6916 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6917 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6918 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6919
6920#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6921 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6922 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6923 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6924
6925#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6926 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6927 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6928 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
6929
6930
6931/* 32-bit segmented: */
6932#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6933 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6934 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6935 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6936
6937#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6938 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6939 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6940 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
6941
6942#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6943 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6944 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6945 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6946
6947#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6948 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6949 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6950 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
6951
6952#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
6953 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
6954 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6955 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6956
6957#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6958 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
6959 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6960 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
6961
6962#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
6963 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
6964 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6965 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6966
6967#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6968 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
6969 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6970 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
6971
6972#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
6973 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
6974 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6975 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6976
6977AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
6978#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
6979 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
6980 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
6981 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6982
6983
6984/* 64-bit segmented: */
6985#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6986 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6987 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6988 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6989
6990AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
6991#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
6992 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
6993 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
6994 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6995
6996
6997/* 8-bit flat: */
6998#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
6999 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
7000 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7001 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7002
7003#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7004 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7005 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7006 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7007
7008#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7009 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7010 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7011 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7012
7013#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7014 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7015 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7016 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7017
7018#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7019 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7020 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7021 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7022
7023#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7024 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7025 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7026 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7027
7028#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7029 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7030 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7031 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7032
7033
7034/* 16-bit flat: */
7035#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7036 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7037 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7038 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7039
7040#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7041 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7042 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7043 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7044
7045#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7046 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7047 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7048 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7049
7050#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7051 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7052 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7053 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7054
7055#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7056 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7057 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7058 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7059
7060#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7061 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7062 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7063 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7064
7065/* 32-bit flat: */
7066#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7067 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7068 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7069 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7070
7071#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7072 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7073 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7074 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7075
7076#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7077 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7078 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7079 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7080
7081#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7082 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7083 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7084 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7085
7086#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7087 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7088 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7089 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7090
7091#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7092 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7093 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7094 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7095
7096#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7097 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7098 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7099 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7100
7101#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7102 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7103 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7104 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7105
7106#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7107 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7108 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7109 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7110
7111#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7112 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7113 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7114 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7115
7116
7117/* 64-bit flat: */
7118#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7119 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7120 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7121 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7122
7123#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7124 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7125 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7126 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7127
7128#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7129/* 128-bit segmented: */
7130#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7131 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7132 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7133 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7134
7135#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7136 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7137 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7138 kIemNativeEmitMemOp_Fetch, \
7139 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7140
7141AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7142#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7143 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
7144 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7145 kIemNativeEmitMemOp_Fetch, \
7146 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7147
7148#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7149 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7150 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7151 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7152
7153#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7154 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7155 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7156 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7157
7158
7159/* 128-bit flat: */
7160#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7161 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7162 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7163 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7164
7165#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7166 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7167 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7168 kIemNativeEmitMemOp_Fetch, \
7169 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7170
7171#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7172 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
7173 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7174 kIemNativeEmitMemOp_Fetch, \
7175 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7176
7177#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7178 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7179 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7180 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7181
7182#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7183 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
7184 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7185 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7186
7187/* 256-bit segmented: */
7188#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7189 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7190 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7191 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7192
7193#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7194 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7195 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7196 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7197
7198#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7199 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7200 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7201 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7202
7203#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7204 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7205 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7206 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7207
7208
7209/* 256-bit flat: */
7210#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
7211 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7212 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7213 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7214
7215#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
7216 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7217 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7218 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7219
7220#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
7221 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7222 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7223 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7224
7225#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
7226 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, \
7227 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7228 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7229
7230#endif
7231
7232
7233/*********************************************************************************************************************************
7234* Memory stores (IEM_MEM_STORE_XXX). *
7235*********************************************************************************************************************************/
7236
7237#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
7238 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
7239 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7240 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7241
7242#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
7243 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
7244 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7245 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7246
7247#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
7248 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
7249 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7250 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7251
7252#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
7253 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
7254 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7255 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7256
7257
7258#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
7259 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
7260 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7261 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7262
7263#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
7264 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
7265 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7266 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7267
7268#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
7269 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
7270 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7271 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7272
7273#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
7274 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
7275 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7276 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7277
7278
7279#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
7280 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7281 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7282
7283#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
7284 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7285 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7286
7287#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
7288 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7289 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7290
7291#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
7292 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7293 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7294
7295
7296#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
7297 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7298 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7299
7300#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
7301 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7302 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7303
7304#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
7305 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7306 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7307
7308#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
7309 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7310 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7311
7312/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
7313 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
7314DECL_INLINE_THROW(uint32_t)
7315iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
7316 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
7317{
7318 /*
7319 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
7320 * to do the grunt work.
7321 */
7322 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
7323 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
7324 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
7325 pfnFunction, idxInstr);
7326 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
7327 return off;
7328}
7329
7330
7331#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7332# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
7333 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7334 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7335 kIemNativeEmitMemOp_Store, \
7336 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
7337
7338# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
7339 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
7340 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7341 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
7342
7343# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
7344 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
7345 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7346 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
7347
7348# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
7349 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7350 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7351 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7352
7353
7354# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
7355 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7356 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7357 kIemNativeEmitMemOp_Store, \
7358 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
7359
7360# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
7361 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
7362 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7363 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
7364
7365# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
7366 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
7367 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7368 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
7369
7370# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
7371 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7372 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7373 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7374#endif
7375
7376
7377
7378/*********************************************************************************************************************************
7379* Stack Accesses. *
7380*********************************************************************************************************************************/
7381/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
7382#define IEM_MC_PUSH_U16(a_u16Value) \
7383 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7384 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
7385#define IEM_MC_PUSH_U32(a_u32Value) \
7386 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7387 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
7388#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
7389 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
7390 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
7391#define IEM_MC_PUSH_U64(a_u64Value) \
7392 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7393 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
7394
7395#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
7396 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7397 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7398#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
7399 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7400 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
7401#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
7402 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
7403 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
7404
7405#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
7406 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7407 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7408#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
7409 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7410 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
7411
7412
7413/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
7414DECL_INLINE_THROW(uint32_t)
7415iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
7416 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7417{
7418 /*
7419 * Assert sanity.
7420 */
7421 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7422 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7423#ifdef VBOX_STRICT
7424 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7425 {
7426 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7427 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7428 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7429 Assert( pfnFunction
7430 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7431 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
7432 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
7433 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7434 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
7435 : UINT64_C(0xc000b000a0009000) ));
7436 }
7437 else
7438 Assert( pfnFunction
7439 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
7440 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
7441 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
7442 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
7443 : UINT64_C(0xc000b000a0009000) ));
7444#endif
7445
7446#ifdef VBOX_STRICT
7447 /*
7448 * Check that the fExec flags we've got make sense.
7449 */
7450 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7451#endif
7452
7453 /*
7454 * To keep things simple we have to commit any pending writes first as we
7455 * may end up making calls.
7456 */
7457 /** @todo we could postpone this till we make the call and reload the
7458 * registers after returning from the call. Not sure if that's sensible or
7459 * not, though. */
7460 off = iemNativeRegFlushPendingWrites(pReNative, off);
7461
7462 /*
7463 * First we calculate the new RSP and the effective stack pointer value.
7464 * For 64-bit mode and flat 32-bit these two are the same.
7465 * (Code structure is very similar to that of PUSH)
7466 */
7467 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7468 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
7469 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
7470 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
7471 ? cbMem : sizeof(uint16_t);
7472 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7473 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7474 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7475 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7476 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7477 if (cBitsFlat != 0)
7478 {
7479 Assert(idxRegEffSp == idxRegRsp);
7480 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7481 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7482 if (cBitsFlat == 64)
7483 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
7484 else
7485 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
7486 }
7487 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7488 {
7489 Assert(idxRegEffSp != idxRegRsp);
7490 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7491 kIemNativeGstRegUse_ReadOnly);
7492#ifdef RT_ARCH_AMD64
7493 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7494#else
7495 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7496#endif
7497 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7498 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7499 offFixupJumpToUseOtherBitSp = off;
7500 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7501 {
7502 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7503 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7504 }
7505 else
7506 {
7507 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7508 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7509 }
7510 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7511 }
7512 /* SpUpdateEnd: */
7513 uint32_t const offLabelSpUpdateEnd = off;
7514
7515 /*
7516 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7517 * we're skipping lookup).
7518 */
7519 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7520 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
7521 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7522 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7523 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7524 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7525 : UINT32_MAX;
7526 uint8_t const idxRegValue = !TlbState.fSkip
7527 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7528 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
7529 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
7530 : UINT8_MAX;
7531 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7532
7533
7534 if (!TlbState.fSkip)
7535 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7536 else
7537 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7538
7539 /*
7540 * Use16BitSp:
7541 */
7542 if (cBitsFlat == 0)
7543 {
7544#ifdef RT_ARCH_AMD64
7545 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7546#else
7547 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7548#endif
7549 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7550 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7551 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7552 else
7553 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7554 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7555 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7556 }
7557
7558 /*
7559 * TlbMiss:
7560 *
7561 * Call helper to do the pushing.
7562 */
7563 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7564
7565#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7566 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7567#else
7568 RT_NOREF(idxInstr);
7569#endif
7570
7571 /* Save variables in volatile registers. */
7572 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7573 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7574 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
7575 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
7576 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7577
7578 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
7579 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
7580 {
7581 /* Swap them using ARG0 as temp register: */
7582 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
7583 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
7584 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
7585 }
7586 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
7587 {
7588 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
7589 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
7590 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7591
7592 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
7593 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7594 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7595 }
7596 else
7597 {
7598 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
7599 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7600
7601 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
7602 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
7603 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
7604 }
7605
7606 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7607 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7608
7609 /* Done setting up parameters, make the call. */
7610 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7611
7612 /* Restore variables and guest shadow registers to volatile registers. */
7613 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7614 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7615
7616#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7617 if (!TlbState.fSkip)
7618 {
7619 /* end of TlbMiss - Jump to the done label. */
7620 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7621 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7622
7623 /*
7624 * TlbLookup:
7625 */
7626 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
7627 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7628
7629 /*
7630 * Emit code to do the actual storing / fetching.
7631 */
7632 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7633# ifdef IEM_WITH_TLB_STATISTICS
7634 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7635 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7636# endif
7637 if (idxRegValue != UINT8_MAX)
7638 {
7639 switch (cbMemAccess)
7640 {
7641 case 2:
7642 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7643 break;
7644 case 4:
7645 if (!fIsIntelSeg)
7646 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7647 else
7648 {
7649 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
7650 PUSH FS in real mode, so we have to try emulate that here.
7651 We borrow the now unused idxReg1 from the TLB lookup code here. */
7652 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
7653 kIemNativeGstReg_EFlags);
7654 if (idxRegEfl != UINT8_MAX)
7655 {
7656#ifdef ARCH_AMD64
7657 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
7658 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7659 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7660#else
7661 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
7662 off, TlbState.idxReg1, idxRegEfl,
7663 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7664#endif
7665 iemNativeRegFreeTmp(pReNative, idxRegEfl);
7666 }
7667 else
7668 {
7669 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
7670 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
7671 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7672 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7673 }
7674 /* ASSUMES the upper half of idxRegValue is ZERO. */
7675 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
7676 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
7677 }
7678 break;
7679 case 8:
7680 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7681 break;
7682 default:
7683 AssertFailed();
7684 }
7685 }
7686 else
7687 {
7688 switch (cbMemAccess)
7689 {
7690 case 2:
7691 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7692 idxRegMemResult, TlbState.idxReg1);
7693 break;
7694 case 4:
7695 Assert(!fIsSegReg);
7696 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7697 idxRegMemResult, TlbState.idxReg1);
7698 break;
7699 case 8:
7700 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
7701 break;
7702 default:
7703 AssertFailed();
7704 }
7705 }
7706
7707 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7708 TlbState.freeRegsAndReleaseVars(pReNative);
7709
7710 /*
7711 * TlbDone:
7712 *
7713 * Commit the new RSP value.
7714 */
7715 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7716 }
7717#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7718
7719#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7720 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
7721#endif
7722 iemNativeRegFreeTmp(pReNative, idxRegRsp);
7723 if (idxRegEffSp != idxRegRsp)
7724 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
7725
7726 /* The value variable is implictly flushed. */
7727 if (idxRegValue != UINT8_MAX)
7728 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7729 iemNativeVarFreeLocal(pReNative, idxVarValue);
7730
7731 return off;
7732}
7733
7734
7735
7736/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
7737#define IEM_MC_POP_GREG_U16(a_iGReg) \
7738 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7739 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
7740#define IEM_MC_POP_GREG_U32(a_iGReg) \
7741 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7742 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
7743#define IEM_MC_POP_GREG_U64(a_iGReg) \
7744 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7745 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
7746
7747#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
7748 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7749 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7750#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
7751 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7752 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
7753
7754#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
7755 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7756 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7757#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
7758 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7759 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
7760
7761
7762DECL_FORCE_INLINE_THROW(uint32_t)
7763iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
7764 uint8_t idxRegTmp)
7765{
7766 /* Use16BitSp: */
7767#ifdef RT_ARCH_AMD64
7768 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7769 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
7770 RT_NOREF(idxRegTmp);
7771#else
7772 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
7773 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
7774 /* add tmp, regrsp, #cbMem */
7775 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
7776 /* and tmp, tmp, #0xffff */
7777 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
7778 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
7779 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
7780 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
7781#endif
7782 return off;
7783}
7784
7785
7786DECL_FORCE_INLINE(uint32_t)
7787iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
7788{
7789 /* Use32BitSp: */
7790 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7791 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
7792 return off;
7793}
7794
7795
7796/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
7797DECL_INLINE_THROW(uint32_t)
7798iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
7799 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7800{
7801 /*
7802 * Assert sanity.
7803 */
7804 Assert(idxGReg < 16);
7805#ifdef VBOX_STRICT
7806 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7807 {
7808 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7809 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7810 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7811 Assert( pfnFunction
7812 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7813 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
7814 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7815 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
7816 : UINT64_C(0xc000b000a0009000) ));
7817 }
7818 else
7819 Assert( pfnFunction
7820 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
7821 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
7822 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
7823 : UINT64_C(0xc000b000a0009000) ));
7824#endif
7825
7826#ifdef VBOX_STRICT
7827 /*
7828 * Check that the fExec flags we've got make sense.
7829 */
7830 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7831#endif
7832
7833 /*
7834 * To keep things simple we have to commit any pending writes first as we
7835 * may end up making calls.
7836 */
7837 off = iemNativeRegFlushPendingWrites(pReNative, off);
7838
7839 /*
7840 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
7841 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
7842 * directly as the effective stack pointer.
7843 * (Code structure is very similar to that of PUSH)
7844 */
7845 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7846 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7847 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7848 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7849 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7850 /** @todo can do a better job picking the register here. For cbMem >= 4 this
7851 * will be the resulting register value. */
7852 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
7853
7854 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7855 if (cBitsFlat != 0)
7856 {
7857 Assert(idxRegEffSp == idxRegRsp);
7858 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7859 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7860 }
7861 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7862 {
7863 Assert(idxRegEffSp != idxRegRsp);
7864 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7865 kIemNativeGstRegUse_ReadOnly);
7866#ifdef RT_ARCH_AMD64
7867 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7868#else
7869 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7870#endif
7871 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7872 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7873 offFixupJumpToUseOtherBitSp = off;
7874 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7875 {
7876/** @todo can skip idxRegRsp updating when popping ESP. */
7877 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7878 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7879 }
7880 else
7881 {
7882 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7883 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7884 }
7885 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7886 }
7887 /* SpUpdateEnd: */
7888 uint32_t const offLabelSpUpdateEnd = off;
7889
7890 /*
7891 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7892 * we're skipping lookup).
7893 */
7894 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7895 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
7896 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7897 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7898 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7899 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7900 : UINT32_MAX;
7901
7902 if (!TlbState.fSkip)
7903 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7904 else
7905 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7906
7907 /*
7908 * Use16BitSp:
7909 */
7910 if (cBitsFlat == 0)
7911 {
7912#ifdef RT_ARCH_AMD64
7913 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7914#else
7915 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7916#endif
7917 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7918 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7919 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7920 else
7921 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7922 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7923 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7924 }
7925
7926 /*
7927 * TlbMiss:
7928 *
7929 * Call helper to do the pushing.
7930 */
7931 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7932
7933#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7934 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7935#else
7936 RT_NOREF(idxInstr);
7937#endif
7938
7939 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7940 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7941 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
7942 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7943
7944
7945 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
7946 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7947 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7948
7949 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7950 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7951
7952 /* Done setting up parameters, make the call. */
7953 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7954
7955 /* Move the return register content to idxRegMemResult. */
7956 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7957 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7958
7959 /* Restore variables and guest shadow registers to volatile registers. */
7960 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7961 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7962
7963#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7964 if (!TlbState.fSkip)
7965 {
7966 /* end of TlbMiss - Jump to the done label. */
7967 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7968 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7969
7970 /*
7971 * TlbLookup:
7972 */
7973 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
7974 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7975
7976 /*
7977 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
7978 */
7979 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7980# ifdef IEM_WITH_TLB_STATISTICS
7981 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7982 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7983# endif
7984 switch (cbMem)
7985 {
7986 case 2:
7987 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7988 break;
7989 case 4:
7990 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7991 break;
7992 case 8:
7993 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7994 break;
7995 default:
7996 AssertFailed();
7997 }
7998
7999 TlbState.freeRegsAndReleaseVars(pReNative);
8000
8001 /*
8002 * TlbDone:
8003 *
8004 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8005 * commit the popped register value.
8006 */
8007 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8008 }
8009#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8010
8011 if (idxGReg != X86_GREG_xSP)
8012 {
8013 /* Set the register. */
8014 if (cbMem >= sizeof(uint32_t))
8015 {
8016#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8017 AssertMsg( pReNative->idxCurCall == 0
8018 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8019 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8020 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8021#endif
8022 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8023#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8024 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8025#endif
8026#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8027 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8028 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8029#endif
8030 }
8031 else
8032 {
8033 Assert(cbMem == sizeof(uint16_t));
8034 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8035 kIemNativeGstRegUse_ForUpdate);
8036 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8037#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8038 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8039#endif
8040 iemNativeRegFreeTmp(pReNative, idxRegDst);
8041 }
8042
8043 /* Complete RSP calculation for FLAT mode. */
8044 if (idxRegEffSp == idxRegRsp)
8045 {
8046 if (cBitsFlat == 64)
8047 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8048 else
8049 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8050 }
8051 }
8052 else
8053 {
8054 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8055 if (cbMem == sizeof(uint64_t))
8056 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8057 else if (cbMem == sizeof(uint32_t))
8058 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8059 else
8060 {
8061 if (idxRegEffSp == idxRegRsp)
8062 {
8063 if (cBitsFlat == 64)
8064 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8065 else
8066 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8067 }
8068 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8069 }
8070 }
8071
8072#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8073 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8074#endif
8075
8076 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8077 if (idxRegEffSp != idxRegRsp)
8078 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8079 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8080
8081 return off;
8082}
8083
8084
8085
8086/*********************************************************************************************************************************
8087* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8088*********************************************************************************************************************************/
8089
8090#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8091 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8092 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8093 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8094
8095#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8096 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8097 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8098 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8099
8100#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8101 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8102 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8103 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8104
8105#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8106 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8107 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8108 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8109
8110
8111#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8112 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8113 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8114 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8115
8116#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8117 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8118 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8119 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8120
8121#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8122 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8123 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8124 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8125
8126#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8127 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8128 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8129 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8130
8131#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8132 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8133 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8134 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8135
8136
8137#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8138 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8139 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8140 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8141
8142#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8143 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8144 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8145 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8146
8147#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8148 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8149 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8150 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8151
8152#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8153 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8154 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8155 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8156
8157#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8158 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8159 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8160 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8161
8162
8163#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8164 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8165 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8166 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8167
8168#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8169 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8170 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8171 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8172#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8173 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8174 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8175 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8176
8177#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8178 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8179 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8180 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8181
8182#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8183 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8184 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8185 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8186
8187
8188#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8189 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8190 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8191 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8192
8193#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8194 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8195 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8196 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8197
8198
8199#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8200 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8201 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8202 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8203
8204#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8205 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8206 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8207 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8208
8209#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8210 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8211 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8212 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8213
8214#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8215 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8216 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8217 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8218
8219
8220
8221#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8222 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8223 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8224 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8225
8226#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8227 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8228 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8229 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8230
8231#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8232 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8233 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8234 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
8235
8236#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8237 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8238 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8239 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
8240
8241
8242#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8243 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8244 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8245 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
8246
8247#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8248 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8249 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8250 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
8251
8252#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8253 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8254 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8255 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8256
8257#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8258 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8259 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8260 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
8261
8262#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
8263 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
8264 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8265 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8266
8267
8268#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8269 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8270 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8271 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
8272
8273#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8274 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8275 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8276 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
8277
8278#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8279 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8280 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8281 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8282
8283#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8284 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8285 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8286 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
8287
8288#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
8289 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
8290 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8291 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8292
8293
8294#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8295 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8296 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8297 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
8298
8299#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8300 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8301 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8302 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
8303
8304#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8305 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8306 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8307 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8308
8309#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8310 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8311 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8312 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
8313
8314#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
8315 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
8316 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8317 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8318
8319
8320#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
8321 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8322 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8323 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
8324
8325#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
8326 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8327 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8328 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
8329
8330
8331#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8332 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8333 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8334 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
8335
8336#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8337 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8338 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8339 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
8340
8341#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8342 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8343 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8344 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
8345
8346#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8347 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8348 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8349 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
8350
8351
8352DECL_INLINE_THROW(uint32_t)
8353iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
8354 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
8355 uintptr_t pfnFunction, uint8_t idxInstr)
8356{
8357 /*
8358 * Assert sanity.
8359 */
8360 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
8361 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
8362 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
8363 && pVarMem->cbVar == sizeof(void *),
8364 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8365
8366 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8367 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8368 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
8369 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
8370 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8371
8372 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
8373 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
8374 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
8375 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
8376 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8377
8378 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
8379
8380 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
8381
8382#ifdef VBOX_STRICT
8383# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
8384 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
8385 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
8386 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
8387 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
8388# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
8389 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
8390 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
8391 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
8392
8393 if (iSegReg == UINT8_MAX)
8394 {
8395 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8396 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8397 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8398 switch (cbMem)
8399 {
8400 case 1:
8401 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
8402 Assert(!fAlignMaskAndCtl);
8403 break;
8404 case 2:
8405 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
8406 Assert(fAlignMaskAndCtl < 2);
8407 break;
8408 case 4:
8409 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
8410 Assert(fAlignMaskAndCtl < 4);
8411 break;
8412 case 8:
8413 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
8414 Assert(fAlignMaskAndCtl < 8);
8415 break;
8416 case 10:
8417 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
8418 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
8419 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8420 Assert(fAlignMaskAndCtl < 8);
8421 break;
8422 case 16:
8423 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
8424 Assert(fAlignMaskAndCtl < 16);
8425 break;
8426# if 0
8427 case 32:
8428 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
8429 Assert(fAlignMaskAndCtl < 32);
8430 break;
8431 case 64:
8432 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
8433 Assert(fAlignMaskAndCtl < 64);
8434 break;
8435# endif
8436 default: AssertFailed(); break;
8437 }
8438 }
8439 else
8440 {
8441 Assert(iSegReg < 6);
8442 switch (cbMem)
8443 {
8444 case 1:
8445 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
8446 Assert(!fAlignMaskAndCtl);
8447 break;
8448 case 2:
8449 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
8450 Assert(fAlignMaskAndCtl < 2);
8451 break;
8452 case 4:
8453 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
8454 Assert(fAlignMaskAndCtl < 4);
8455 break;
8456 case 8:
8457 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
8458 Assert(fAlignMaskAndCtl < 8);
8459 break;
8460 case 10:
8461 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
8462 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
8463 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8464 Assert(fAlignMaskAndCtl < 8);
8465 break;
8466 case 16:
8467 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
8468 Assert(fAlignMaskAndCtl < 16);
8469 break;
8470# if 0
8471 case 32:
8472 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
8473 Assert(fAlignMaskAndCtl < 32);
8474 break;
8475 case 64:
8476 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
8477 Assert(fAlignMaskAndCtl < 64);
8478 break;
8479# endif
8480 default: AssertFailed(); break;
8481 }
8482 }
8483# undef IEM_MAP_HLP_FN
8484# undef IEM_MAP_HLP_FN_NO_AT
8485#endif
8486
8487#ifdef VBOX_STRICT
8488 /*
8489 * Check that the fExec flags we've got make sense.
8490 */
8491 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8492#endif
8493
8494 /*
8495 * To keep things simple we have to commit any pending writes first as we
8496 * may end up making calls.
8497 */
8498 off = iemNativeRegFlushPendingWrites(pReNative, off);
8499
8500#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8501 /*
8502 * Move/spill/flush stuff out of call-volatile registers.
8503 * This is the easy way out. We could contain this to the tlb-miss branch
8504 * by saving and restoring active stuff here.
8505 */
8506 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
8507 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
8508#endif
8509
8510 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
8511 while the tlb-miss codepath will temporarily put it on the stack.
8512 Set the the type to stack here so we don't need to do it twice below. */
8513 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
8514 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
8515 /** @todo use a tmp register from TlbState, since they'll be free after tlb
8516 * lookup is done. */
8517
8518 /*
8519 * Define labels and allocate the result register (trying for the return
8520 * register if we can).
8521 */
8522 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8523 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
8524 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
8525 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
8526 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
8527 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8528 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8529 : UINT32_MAX;
8530//off=iemNativeEmitBrk(pReNative, off, 0);
8531 /*
8532 * Jump to the TLB lookup code.
8533 */
8534 if (!TlbState.fSkip)
8535 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8536
8537 /*
8538 * TlbMiss:
8539 *
8540 * Call helper to do the fetching.
8541 * We flush all guest register shadow copies here.
8542 */
8543 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
8544
8545#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8546 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8547#else
8548 RT_NOREF(idxInstr);
8549#endif
8550
8551#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8552 /* Save variables in volatile registers. */
8553 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
8554 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8555#endif
8556
8557 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
8558 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
8559#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8560 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
8561#else
8562 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8563#endif
8564
8565 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
8566 if (iSegReg != UINT8_MAX)
8567 {
8568 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
8569 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
8570 }
8571
8572 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
8573 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
8574 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
8575
8576 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8577 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8578
8579 /* Done setting up parameters, make the call. */
8580 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8581
8582 /*
8583 * Put the output in the right registers.
8584 */
8585 Assert(idxRegMemResult == pVarMem->idxReg);
8586 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8587 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8588
8589#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8590 /* Restore variables and guest shadow registers to volatile registers. */
8591 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8592 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8593#endif
8594
8595 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
8596 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
8597
8598#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8599 if (!TlbState.fSkip)
8600 {
8601 /* end of tlbsmiss - Jump to the done label. */
8602 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8603 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8604
8605 /*
8606 * TlbLookup:
8607 */
8608 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
8609 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8610# ifdef IEM_WITH_TLB_STATISTICS
8611 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
8612 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
8613# endif
8614
8615 /* [idxVarUnmapInfo] = 0; */
8616 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
8617
8618 /*
8619 * TlbDone:
8620 */
8621 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8622
8623 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
8624
8625# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8626 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
8627 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8628# endif
8629 }
8630#else
8631 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
8632#endif
8633
8634 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8635 iemNativeVarRegisterRelease(pReNative, idxVarMem);
8636
8637 return off;
8638}
8639
8640
8641#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
8642 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
8643 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
8644
8645#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
8646 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
8647 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
8648
8649#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
8650 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
8651 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
8652
8653#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
8654 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
8655 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
8656
8657DECL_INLINE_THROW(uint32_t)
8658iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
8659 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
8660{
8661 /*
8662 * Assert sanity.
8663 */
8664 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8665#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
8666 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8667#endif
8668 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
8669 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
8670 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
8671#ifdef VBOX_STRICT
8672 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
8673 {
8674 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
8675 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
8676 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
8677 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
8678 case IEM_ACCESS_TYPE_WRITE:
8679 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
8680 case IEM_ACCESS_TYPE_READ:
8681 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
8682 default: AssertFailed();
8683 }
8684#else
8685 RT_NOREF(fAccess);
8686#endif
8687
8688 /*
8689 * To keep things simple we have to commit any pending writes first as we
8690 * may end up making calls (there shouldn't be any at this point, so this
8691 * is just for consistency).
8692 */
8693 /** @todo we could postpone this till we make the call and reload the
8694 * registers after returning from the call. Not sure if that's sensible or
8695 * not, though. */
8696 off = iemNativeRegFlushPendingWrites(pReNative, off);
8697
8698 /*
8699 * Move/spill/flush stuff out of call-volatile registers.
8700 *
8701 * We exclude any register holding the bUnmapInfo variable, as we'll be
8702 * checking it after returning from the call and will free it afterwards.
8703 */
8704 /** @todo save+restore active registers and maybe guest shadows in miss
8705 * scenario. */
8706 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
8707 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
8708
8709 /*
8710 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
8711 * to call the unmap helper function.
8712 *
8713 * The likelyhood of it being zero is higher than for the TLB hit when doing
8714 * the mapping, as a TLB miss for an well aligned and unproblematic memory
8715 * access should also end up with a mapping that won't need special unmapping.
8716 */
8717 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
8718 * should speed up things for the pure interpreter as well when TLBs
8719 * are enabled. */
8720#ifdef RT_ARCH_AMD64
8721 if (pVarUnmapInfo->idxReg == UINT8_MAX)
8722 {
8723 /* test byte [rbp - xxx], 0ffh */
8724 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8725 pbCodeBuf[off++] = 0xf6;
8726 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
8727 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8728 pbCodeBuf[off++] = 0xff;
8729 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8730 }
8731 else
8732#endif
8733 {
8734 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
8735 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
8736 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
8737 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8738 }
8739 uint32_t const offJmpFixup = off;
8740 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
8741
8742 /*
8743 * Call the unmap helper function.
8744 */
8745#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
8746 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8747#else
8748 RT_NOREF(idxInstr);
8749#endif
8750
8751 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
8752 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
8753 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8754
8755 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8756 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8757
8758 /* Done setting up parameters, make the call. */
8759 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8760
8761 /* The bUnmapInfo variable is implictly free by these MCs. */
8762 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
8763
8764 /*
8765 * Done, just fixup the jump for the non-call case.
8766 */
8767 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
8768
8769 return off;
8770}
8771
8772
8773
8774/*********************************************************************************************************************************
8775* State and Exceptions *
8776*********************************************************************************************************************************/
8777
8778#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8779#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8780
8781#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8782#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8783#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8784
8785#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8786#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8787#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8788
8789
8790DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
8791{
8792#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
8793 RT_NOREF(pReNative, fForChange);
8794#else
8795 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
8796 && fForChange)
8797 {
8798# ifdef RT_ARCH_AMD64
8799
8800 /* Need to save the host MXCSR the first time, and clear the exception flags. */
8801 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
8802 {
8803 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
8804
8805 /* stmxcsr */
8806 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
8807 pbCodeBuf[off++] = X86_OP_REX_B;
8808 pbCodeBuf[off++] = 0x0f;
8809 pbCodeBuf[off++] = 0xae;
8810 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
8811 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
8812 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
8813 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
8814 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
8815 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8816
8817 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
8818 }
8819
8820 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
8821 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
8822
8823 /*
8824 * Mask any exceptions and clear the exception status and save into MXCSR,
8825 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
8826 * a register source/target (sigh).
8827 */
8828 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
8829 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
8830 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
8831 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
8832
8833 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
8834
8835 /* ldmxcsr */
8836 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
8837 pbCodeBuf[off++] = X86_OP_REX_B;
8838 pbCodeBuf[off++] = 0x0f;
8839 pbCodeBuf[off++] = 0xae;
8840 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
8841 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
8842 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
8843 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
8844 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
8845 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8846
8847 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8848 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8849
8850# elif defined(RT_ARCH_ARM64)
8851 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
8852
8853 /* Need to save the host floating point control register the first time, clear FPSR. */
8854 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
8855 {
8856 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8857 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
8858 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
8859 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
8860 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
8861 }
8862
8863 /*
8864 * Translate MXCSR to FPCR.
8865 *
8866 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
8867 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
8868 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
8869 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
8870 */
8871 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
8872 * and implement alternate handling if FEAT_AFP is present. */
8873 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
8874
8875 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8876
8877 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
8878 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
8879
8880 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
8881 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
8882 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
8883 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
8884 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
8885
8886 /*
8887 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
8888 *
8889 * Value MXCSR FPCR
8890 * 0 RN RN
8891 * 1 R- R+
8892 * 2 R+ R-
8893 * 3 RZ RZ
8894 *
8895 * Conversion can be achieved by switching bit positions
8896 */
8897 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
8898 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
8899 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
8900 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
8901
8902 /* Write the value to FPCR. */
8903 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
8904
8905 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8906 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8907 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8908# else
8909# error "Port me"
8910# endif
8911 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
8912 }
8913#endif
8914 return off;
8915}
8916
8917
8918
8919/*********************************************************************************************************************************
8920* Emitters for FPU related operations. *
8921*********************************************************************************************************************************/
8922
8923#define IEM_MC_FETCH_FCW(a_u16Fcw) \
8924 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
8925
8926/** Emits code for IEM_MC_FETCH_FCW. */
8927DECL_INLINE_THROW(uint32_t)
8928iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8929{
8930 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8931 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8932
8933 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8934
8935 /* Allocate a temporary FCW register. */
8936 /** @todo eliminate extra register */
8937 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
8938 kIemNativeGstRegUse_ReadOnly);
8939
8940 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
8941
8942 /* Free but don't flush the FCW register. */
8943 iemNativeRegFreeTmp(pReNative, idxFcwReg);
8944 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8945
8946 return off;
8947}
8948
8949
8950#define IEM_MC_FETCH_FSW(a_u16Fsw) \
8951 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
8952
8953/** Emits code for IEM_MC_FETCH_FSW. */
8954DECL_INLINE_THROW(uint32_t)
8955iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8956{
8957 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8958 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8959
8960 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
8961 /* Allocate a temporary FSW register. */
8962 /** @todo eliminate extra register */
8963 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
8964 kIemNativeGstRegUse_ReadOnly);
8965
8966 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
8967
8968 /* Free but don't flush the FSW register. */
8969 iemNativeRegFreeTmp(pReNative, idxFswReg);
8970 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8971
8972 return off;
8973}
8974
8975
8976
8977#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8978
8979
8980/*********************************************************************************************************************************
8981* Emitters for SSE/AVX specific operations. *
8982*********************************************************************************************************************************/
8983
8984#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
8985 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
8986
8987/** Emits code for IEM_MC_COPY_XREG_U128. */
8988DECL_INLINE_THROW(uint32_t)
8989iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
8990{
8991 /* This is a nop if the source and destination register are the same. */
8992 if (iXRegDst != iXRegSrc)
8993 {
8994 /* Allocate destination and source register. */
8995 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
8996 kIemNativeGstSimdRegLdStSz_Low128,
8997 kIemNativeGstRegUse_ForFullWrite);
8998 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
8999 kIemNativeGstSimdRegLdStSz_Low128,
9000 kIemNativeGstRegUse_ReadOnly);
9001
9002 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9003
9004 /* Free but don't flush the source and destination register. */
9005 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9006 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9007 }
9008
9009 return off;
9010}
9011
9012
9013#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9014 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9015
9016/** Emits code for IEM_MC_FETCH_XREG_U128. */
9017DECL_INLINE_THROW(uint32_t)
9018iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9019{
9020 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9021 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9022
9023 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9024 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9025
9026 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9027
9028 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9029
9030 /* Free but don't flush the source register. */
9031 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9032 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9033
9034 return off;
9035}
9036
9037
9038#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9039 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9040
9041#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9042 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9043
9044/** Emits code for IEM_MC_FETCH_XREG_U64. */
9045DECL_INLINE_THROW(uint32_t)
9046iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9047{
9048 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9049 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9050
9051 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9052 kIemNativeGstSimdRegLdStSz_Low128,
9053 kIemNativeGstRegUse_ReadOnly);
9054
9055 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9056 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9057
9058 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9059
9060 /* Free but don't flush the source register. */
9061 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9062 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9063
9064 return off;
9065}
9066
9067
9068#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9069 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9070
9071#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9072 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9073
9074/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9075DECL_INLINE_THROW(uint32_t)
9076iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9077{
9078 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9079 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9080
9081 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9082 kIemNativeGstSimdRegLdStSz_Low128,
9083 kIemNativeGstRegUse_ReadOnly);
9084
9085 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9086 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9087
9088 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9089
9090 /* Free but don't flush the source register. */
9091 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9092 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9093
9094 return off;
9095}
9096
9097
9098#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9099 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9100
9101/** Emits code for IEM_MC_FETCH_XREG_U16. */
9102DECL_INLINE_THROW(uint32_t)
9103iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9104{
9105 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9106 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9107
9108 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9109 kIemNativeGstSimdRegLdStSz_Low128,
9110 kIemNativeGstRegUse_ReadOnly);
9111
9112 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9113 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9114
9115 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9116
9117 /* Free but don't flush the source register. */
9118 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9119 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9120
9121 return off;
9122}
9123
9124
9125#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9126 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9127
9128/** Emits code for IEM_MC_FETCH_XREG_U8. */
9129DECL_INLINE_THROW(uint32_t)
9130iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9131{
9132 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9133 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9134
9135 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9136 kIemNativeGstSimdRegLdStSz_Low128,
9137 kIemNativeGstRegUse_ReadOnly);
9138
9139 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9140 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9141
9142 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9143
9144 /* Free but don't flush the source register. */
9145 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9146 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9147
9148 return off;
9149}
9150
9151
9152#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9153 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9154
9155AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9156#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9157 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9158
9159
9160/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9161DECL_INLINE_THROW(uint32_t)
9162iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9163{
9164 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9165 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9166
9167 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9168 kIemNativeGstSimdRegLdStSz_Low128,
9169 kIemNativeGstRegUse_ForFullWrite);
9170 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9171
9172 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9173
9174 /* Free but don't flush the source register. */
9175 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9176 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9177
9178 return off;
9179}
9180
9181
9182#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9183 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9184
9185#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9186 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9187
9188#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9189 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9190
9191#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9192 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9193
9194#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9195 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9196
9197#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9198 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9199
9200/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9201DECL_INLINE_THROW(uint32_t)
9202iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9203 uint8_t cbLocal, uint8_t iElem)
9204{
9205 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9206 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
9207
9208#ifdef VBOX_STRICT
9209 switch (cbLocal)
9210 {
9211 case sizeof(uint64_t): Assert(iElem < 2); break;
9212 case sizeof(uint32_t): Assert(iElem < 4); break;
9213 case sizeof(uint16_t): Assert(iElem < 8); break;
9214 case sizeof(uint8_t): Assert(iElem < 16); break;
9215 default: AssertFailed();
9216 }
9217#endif
9218
9219 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9220 kIemNativeGstSimdRegLdStSz_Low128,
9221 kIemNativeGstRegUse_ForUpdate);
9222 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9223
9224 switch (cbLocal)
9225 {
9226 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9227 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9228 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9229 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9230 default: AssertFailed();
9231 }
9232
9233 /* Free but don't flush the source register. */
9234 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9235 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9236
9237 return off;
9238}
9239
9240
9241#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
9242 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
9243
9244/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
9245DECL_INLINE_THROW(uint32_t)
9246iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9247{
9248 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9249 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9250
9251 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9252 kIemNativeGstSimdRegLdStSz_Low128,
9253 kIemNativeGstRegUse_ForUpdate);
9254 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9255
9256 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
9257 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9258 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9259
9260 /* Free but don't flush the source register. */
9261 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9262 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9263
9264 return off;
9265}
9266
9267
9268#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
9269 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
9270
9271/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
9272DECL_INLINE_THROW(uint32_t)
9273iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9274{
9275 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9276 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9277
9278 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9279 kIemNativeGstSimdRegLdStSz_Low128,
9280 kIemNativeGstRegUse_ForUpdate);
9281 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9282
9283 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
9284 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9285 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9286
9287 /* Free but don't flush the source register. */
9288 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9289 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9290
9291 return off;
9292}
9293
9294
9295#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
9296 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
9297
9298/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
9299DECL_INLINE_THROW(uint32_t)
9300iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
9301 uint8_t idxSrcVar, uint8_t iDwSrc)
9302{
9303 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9304 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9305
9306 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9307 kIemNativeGstSimdRegLdStSz_Low128,
9308 kIemNativeGstRegUse_ForUpdate);
9309 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9310
9311 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
9312 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
9313
9314 /* Free but don't flush the destination register. */
9315 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9316 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9317
9318 return off;
9319}
9320
9321
9322#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9323 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9324
9325/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
9326DECL_INLINE_THROW(uint32_t)
9327iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9328{
9329 /*
9330 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
9331 * if iYRegDst gets allocated first for the full write it won't load the
9332 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
9333 * duplicated from the already allocated host register for iYRegDst containing
9334 * garbage. This will be catched by the guest register value checking in debug
9335 * builds.
9336 */
9337 if (iYRegDst != iYRegSrc)
9338 {
9339 /* Allocate destination and source register. */
9340 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9341 kIemNativeGstSimdRegLdStSz_256,
9342 kIemNativeGstRegUse_ForFullWrite);
9343 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9344 kIemNativeGstSimdRegLdStSz_Low128,
9345 kIemNativeGstRegUse_ReadOnly);
9346
9347 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9348 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9349
9350 /* Free but don't flush the source and destination register. */
9351 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9352 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9353 }
9354 else
9355 {
9356 /* This effectively only clears the upper 128-bits of the register. */
9357 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9358 kIemNativeGstSimdRegLdStSz_High128,
9359 kIemNativeGstRegUse_ForFullWrite);
9360
9361 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9362
9363 /* Free but don't flush the destination register. */
9364 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9365 }
9366
9367 return off;
9368}
9369
9370
9371#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9372 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9373
9374/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
9375DECL_INLINE_THROW(uint32_t)
9376iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9377{
9378 /*
9379 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
9380 * if iYRegDst gets allocated first for the full write it won't load the
9381 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
9382 * duplicated from the already allocated host register for iYRegDst containing
9383 * garbage. This will be catched by the guest register value checking in debug
9384 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
9385 * for a zmm register we don't support yet, so this is just a nop.
9386 */
9387 if (iYRegDst != iYRegSrc)
9388 {
9389 /* Allocate destination and source register. */
9390 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9391 kIemNativeGstSimdRegLdStSz_256,
9392 kIemNativeGstRegUse_ReadOnly);
9393 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9394 kIemNativeGstSimdRegLdStSz_256,
9395 kIemNativeGstRegUse_ForFullWrite);
9396
9397 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9398
9399 /* Free but don't flush the source and destination register. */
9400 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9401 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9402 }
9403
9404 return off;
9405}
9406
9407
9408#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
9409 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
9410
9411/** Emits code for IEM_MC_FETCH_YREG_U128. */
9412DECL_INLINE_THROW(uint32_t)
9413iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
9414{
9415 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9416 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9417
9418 Assert(iDQWord <= 1);
9419 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9420 iDQWord == 1
9421 ? kIemNativeGstSimdRegLdStSz_High128
9422 : kIemNativeGstSimdRegLdStSz_Low128,
9423 kIemNativeGstRegUse_ReadOnly);
9424
9425 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9426 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9427
9428 if (iDQWord == 1)
9429 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9430 else
9431 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9432
9433 /* Free but don't flush the source register. */
9434 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9435 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9436
9437 return off;
9438}
9439
9440
9441#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
9442 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
9443
9444/** Emits code for IEM_MC_FETCH_YREG_U64. */
9445DECL_INLINE_THROW(uint32_t)
9446iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
9447{
9448 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9449 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9450
9451 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9452 iQWord >= 2
9453 ? kIemNativeGstSimdRegLdStSz_High128
9454 : kIemNativeGstSimdRegLdStSz_Low128,
9455 kIemNativeGstRegUse_ReadOnly);
9456
9457 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9458 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9459
9460 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9461
9462 /* Free but don't flush the source register. */
9463 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9464 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9465
9466 return off;
9467}
9468
9469
9470#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
9471 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
9472
9473/** Emits code for IEM_MC_FETCH_YREG_U32. */
9474DECL_INLINE_THROW(uint32_t)
9475iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
9476{
9477 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9478 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9479
9480 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9481 iDWord >= 4
9482 ? kIemNativeGstSimdRegLdStSz_High128
9483 : kIemNativeGstSimdRegLdStSz_Low128,
9484 kIemNativeGstRegUse_ReadOnly);
9485
9486 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9487 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9488
9489 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9490
9491 /* Free but don't flush the source register. */
9492 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9493 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9494
9495 return off;
9496}
9497
9498
9499#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
9500 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
9501
9502/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
9503DECL_INLINE_THROW(uint32_t)
9504iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9505{
9506 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9507 kIemNativeGstSimdRegLdStSz_High128,
9508 kIemNativeGstRegUse_ForFullWrite);
9509
9510 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9511
9512 /* Free but don't flush the register. */
9513 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9514
9515 return off;
9516}
9517
9518
9519#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
9520 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
9521
9522/** Emits code for IEM_MC_STORE_YREG_U128. */
9523DECL_INLINE_THROW(uint32_t)
9524iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
9525{
9526 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9527 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9528
9529 Assert(iDQword <= 1);
9530 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9531 iDQword == 0
9532 ? kIemNativeGstSimdRegLdStSz_Low128
9533 : kIemNativeGstSimdRegLdStSz_High128,
9534 kIemNativeGstRegUse_ForFullWrite);
9535
9536 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9537
9538 if (iDQword == 0)
9539 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9540 else
9541 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
9542
9543 /* Free but don't flush the source register. */
9544 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9545 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9546
9547 return off;
9548}
9549
9550
9551#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9552 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9553
9554/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
9555DECL_INLINE_THROW(uint32_t)
9556iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9557{
9558 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9559 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9560
9561 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9562 kIemNativeGstSimdRegLdStSz_256,
9563 kIemNativeGstRegUse_ForFullWrite);
9564
9565 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9566
9567 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9568 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9569
9570 /* Free but don't flush the source register. */
9571 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9572 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9573
9574 return off;
9575}
9576
9577
9578#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
9579 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
9580
9581/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
9582DECL_INLINE_THROW(uint32_t)
9583iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9584{
9585 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9586 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9587
9588 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9589 kIemNativeGstSimdRegLdStSz_256,
9590 kIemNativeGstRegUse_ForFullWrite);
9591
9592 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9593
9594 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9595 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9596
9597 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9598 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9599
9600 return off;
9601}
9602
9603
9604#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
9605 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
9606
9607/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
9608DECL_INLINE_THROW(uint32_t)
9609iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9610{
9611 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9612 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9613
9614 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9615 kIemNativeGstSimdRegLdStSz_256,
9616 kIemNativeGstRegUse_ForFullWrite);
9617
9618 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9619
9620 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9621 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9622
9623 /* Free but don't flush the source register. */
9624 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9625 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9626
9627 return off;
9628}
9629
9630
9631#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
9632 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
9633
9634/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
9635DECL_INLINE_THROW(uint32_t)
9636iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9637{
9638 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9639 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9640
9641 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9642 kIemNativeGstSimdRegLdStSz_256,
9643 kIemNativeGstRegUse_ForFullWrite);
9644
9645 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9646
9647 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9648 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9649
9650 /* Free but don't flush the source register. */
9651 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9652 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9653
9654 return off;
9655}
9656
9657
9658#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
9659 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
9660
9661/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
9662DECL_INLINE_THROW(uint32_t)
9663iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9664{
9665 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9666 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9667
9668 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9669 kIemNativeGstSimdRegLdStSz_256,
9670 kIemNativeGstRegUse_ForFullWrite);
9671
9672 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9673
9674 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9675 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9676
9677 /* Free but don't flush the source register. */
9678 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9679 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9680
9681 return off;
9682}
9683
9684
9685#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
9686 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
9687
9688/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
9689DECL_INLINE_THROW(uint32_t)
9690iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9691{
9692 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9693 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9694
9695 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9696 kIemNativeGstSimdRegLdStSz_256,
9697 kIemNativeGstRegUse_ForFullWrite);
9698
9699 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9700
9701 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9702
9703 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9704 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9705
9706 return off;
9707}
9708
9709
9710#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
9711 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
9712
9713/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
9714DECL_INLINE_THROW(uint32_t)
9715iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9716{
9717 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9718 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9719
9720 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9721 kIemNativeGstSimdRegLdStSz_256,
9722 kIemNativeGstRegUse_ForFullWrite);
9723
9724 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9725
9726 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9727
9728 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9729 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9730
9731 return off;
9732}
9733
9734
9735#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9736 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9737
9738/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
9739DECL_INLINE_THROW(uint32_t)
9740iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9741{
9742 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9743 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9744
9745 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9746 kIemNativeGstSimdRegLdStSz_256,
9747 kIemNativeGstRegUse_ForFullWrite);
9748
9749 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9750
9751 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9752
9753 /* Free but don't flush the source register. */
9754 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9755 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9756
9757 return off;
9758}
9759
9760
9761#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9762 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9763
9764/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
9765DECL_INLINE_THROW(uint32_t)
9766iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9767{
9768 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9769 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9770
9771 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9772 kIemNativeGstSimdRegLdStSz_256,
9773 kIemNativeGstRegUse_ForFullWrite);
9774
9775 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9776
9777 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9778
9779 /* Free but don't flush the source register. */
9780 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9781 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9782
9783 return off;
9784}
9785
9786
9787#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9788 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9789
9790/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
9791DECL_INLINE_THROW(uint32_t)
9792iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9793{
9794 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9795 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9796
9797 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9798 kIemNativeGstSimdRegLdStSz_256,
9799 kIemNativeGstRegUse_ForFullWrite);
9800
9801 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9802
9803 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
9804
9805 /* Free but don't flush the source register. */
9806 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9807 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9808
9809 return off;
9810}
9811
9812
9813#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9814 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9815
9816/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
9817DECL_INLINE_THROW(uint32_t)
9818iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9819{
9820 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9821 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9822
9823 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9824 kIemNativeGstSimdRegLdStSz_256,
9825 kIemNativeGstRegUse_ForFullWrite);
9826
9827 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9828
9829 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9830 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
9831
9832 /* Free but don't flush the source register. */
9833 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9834 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9835
9836 return off;
9837}
9838
9839
9840#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9841 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9842
9843/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
9844DECL_INLINE_THROW(uint32_t)
9845iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9846{
9847 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9848 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9849
9850 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9851 kIemNativeGstSimdRegLdStSz_256,
9852 kIemNativeGstRegUse_ForFullWrite);
9853
9854 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9855
9856 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9857 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9858
9859 /* Free but don't flush the source register. */
9860 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9861 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9862
9863 return off;
9864}
9865
9866
9867#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
9868 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
9869
9870/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
9871DECL_INLINE_THROW(uint32_t)
9872iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
9873{
9874 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9875 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9876
9877 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9878 kIemNativeGstSimdRegLdStSz_256,
9879 kIemNativeGstRegUse_ForFullWrite);
9880 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9881 kIemNativeGstSimdRegLdStSz_Low128,
9882 kIemNativeGstRegUse_ReadOnly);
9883 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9884
9885 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9886 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9887 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9888
9889 /* Free but don't flush the source and destination registers. */
9890 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9891 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9892 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9893
9894 return off;
9895}
9896
9897
9898#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
9899 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
9900
9901/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
9902DECL_INLINE_THROW(uint32_t)
9903iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
9904{
9905 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9906 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9907
9908 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9909 kIemNativeGstSimdRegLdStSz_256,
9910 kIemNativeGstRegUse_ForFullWrite);
9911 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9912 kIemNativeGstSimdRegLdStSz_Low128,
9913 kIemNativeGstRegUse_ReadOnly);
9914 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9915
9916 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9917 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
9918 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9919
9920 /* Free but don't flush the source and destination registers. */
9921 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9922 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9923 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9924
9925 return off;
9926}
9927
9928
9929#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
9930 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
9931
9932
9933/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
9934DECL_INLINE_THROW(uint32_t)
9935iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
9936{
9937 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9938 kIemNativeGstSimdRegLdStSz_Low128,
9939 kIemNativeGstRegUse_ForUpdate);
9940
9941 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
9942 if (bImm8Mask & RT_BIT(0))
9943 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
9944 if (bImm8Mask & RT_BIT(1))
9945 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
9946 if (bImm8Mask & RT_BIT(2))
9947 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
9948 if (bImm8Mask & RT_BIT(3))
9949 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
9950
9951 /* Free but don't flush the destination register. */
9952 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9953
9954 return off;
9955}
9956
9957
9958#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
9959 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
9960
9961#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
9962 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
9963
9964/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
9965DECL_INLINE_THROW(uint32_t)
9966iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
9967{
9968 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9969 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
9970
9971 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9972 kIemNativeGstSimdRegLdStSz_256,
9973 kIemNativeGstRegUse_ReadOnly);
9974 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9975
9976 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
9977
9978 /* Free but don't flush the source register. */
9979 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9980 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9981
9982 return off;
9983}
9984
9985
9986#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
9987 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
9988
9989#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
9990 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
9991
9992/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
9993DECL_INLINE_THROW(uint32_t)
9994iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
9995{
9996 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9997 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9998
9999 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10000 kIemNativeGstSimdRegLdStSz_256,
10001 kIemNativeGstRegUse_ForFullWrite);
10002 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10003
10004 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10005
10006 /* Free but don't flush the source register. */
10007 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10008 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10009
10010 return off;
10011}
10012
10013
10014#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10015 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10016
10017
10018/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10019DECL_INLINE_THROW(uint32_t)
10020iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10021 uint8_t idxSrcVar, uint8_t iDwSrc)
10022{
10023 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10024 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10025
10026 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10027 iDwDst < 4
10028 ? kIemNativeGstSimdRegLdStSz_Low128
10029 : kIemNativeGstSimdRegLdStSz_High128,
10030 kIemNativeGstRegUse_ForUpdate);
10031 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10032 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10033
10034 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10035 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10036
10037 /* Free but don't flush the source register. */
10038 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10039 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10040 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10041
10042 return off;
10043}
10044
10045
10046#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10047 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10048
10049
10050/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10051DECL_INLINE_THROW(uint32_t)
10052iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10053 uint8_t idxSrcVar, uint8_t iQwSrc)
10054{
10055 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10056 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10057
10058 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10059 iQwDst < 2
10060 ? kIemNativeGstSimdRegLdStSz_Low128
10061 : kIemNativeGstSimdRegLdStSz_High128,
10062 kIemNativeGstRegUse_ForUpdate);
10063 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10064 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10065
10066 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10067 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10068
10069 /* Free but don't flush the source register. */
10070 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10071 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10072 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10073
10074 return off;
10075}
10076
10077
10078#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10079 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10080
10081
10082/** Emits code for IEM_MC_STORE_YREG_U64. */
10083DECL_INLINE_THROW(uint32_t)
10084iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10085{
10086 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10087 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10088
10089 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10090 iQwDst < 2
10091 ? kIemNativeGstSimdRegLdStSz_Low128
10092 : kIemNativeGstSimdRegLdStSz_High128,
10093 kIemNativeGstRegUse_ForUpdate);
10094
10095 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10096
10097 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10098
10099 /* Free but don't flush the source register. */
10100 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10101 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10102
10103 return off;
10104}
10105
10106
10107#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
10108 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
10109
10110/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10111DECL_INLINE_THROW(uint32_t)
10112iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10113{
10114 RT_NOREF(pReNative, iYReg);
10115 /** @todo Needs to be implemented when support for AVX-512 is added. */
10116 return off;
10117}
10118
10119
10120
10121/*********************************************************************************************************************************
10122* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10123*********************************************************************************************************************************/
10124
10125/**
10126 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10127 */
10128DECL_INLINE_THROW(uint32_t)
10129iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
10130{
10131 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10132 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10133 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10134 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10135
10136#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
10137 /*
10138 * Need to do the FPU preparation.
10139 */
10140 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10141#endif
10142
10143 /*
10144 * Do all the call setup and cleanup.
10145 */
10146 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10147 false /*fFlushPendingWrites*/);
10148
10149 /*
10150 * Load the MXCSR register into the first argument and mask out the current exception flags.
10151 */
10152 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10153 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10154
10155 /*
10156 * Make the call.
10157 */
10158 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10159
10160 /*
10161 * The updated MXCSR is in the return register, update exception status flags.
10162 *
10163 * The return register is marked allocated as a temporary because it is required for the
10164 * exception generation check below.
10165 */
10166 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
10167 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
10168 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
10169
10170#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10171 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10172 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
10173#endif
10174
10175 /*
10176 * Make sure we don't have any outstanding guest register writes as we may
10177 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
10178 */
10179 off = iemNativeRegFlushPendingWrites(pReNative, off);
10180
10181#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10182 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10183#else
10184 RT_NOREF(idxInstr);
10185#endif
10186
10187 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
10188 * want to assume the existence for this instruction at the moment. */
10189 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
10190
10191 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
10192 /* tmp &= X86_MXCSR_XCPT_MASK */
10193 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
10194 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
10195 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
10196 /* tmp = ~tmp */
10197 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
10198 /* tmp &= mxcsr */
10199 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
10200 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
10201 kIemNativeLabelType_RaiseSseAvxFpRelated);
10202
10203 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
10204 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10205 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
10206
10207 return off;
10208}
10209
10210
10211#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
10212 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10213
10214/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
10215DECL_INLINE_THROW(uint32_t)
10216iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10217{
10218 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10219 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10220 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
10221}
10222
10223
10224#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10225 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10226
10227/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
10228DECL_INLINE_THROW(uint32_t)
10229iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
10230 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10231{
10232 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10233 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10234 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10235 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
10236}
10237
10238
10239/*********************************************************************************************************************************
10240* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
10241*********************************************************************************************************************************/
10242
10243#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
10244 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10245
10246/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
10247DECL_INLINE_THROW(uint32_t)
10248iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10249{
10250 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10251 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10252 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
10253}
10254
10255
10256#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10257 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10258
10259/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
10260DECL_INLINE_THROW(uint32_t)
10261iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
10262 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10263{
10264 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10265 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10266 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10267 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
10268}
10269
10270
10271#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
10272
10273
10274/*********************************************************************************************************************************
10275* Include instruction emitters. *
10276*********************************************************************************************************************************/
10277#include "target-x86/IEMAllN8veEmit-x86.h"
10278
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette