VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 106123

Last change on this file since 106123 was 106123, checked in by vboxsync, 2 months ago

VMM/IEM: More work on recompilation-time checks of skipped & postponed EFLAGS - fixed incorrect skipping for variants that clears EFLAGS when advancing RIP. The latter had slipped thru and this change reduces the effectiveness of the skipping stuff. :-( bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 547.9 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 106123 2024-09-23 22:04:30Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
82# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
83#endif
84
85
86/*********************************************************************************************************************************
87* Code emitters for flushing pending guest register writes and sanity checks *
88*********************************************************************************************************************************/
89
90#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
91
92# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
93/**
94 * Updates IEMCPU::uPcUpdatingDebug.
95 */
96DECL_INLINE_THROW(uint32_t) iemNativeEmitPcDebugAdd(PIEMRECOMPILERSTATE pReNative, uint32_t off, int64_t offDisp, uint8_t cBits)
97{
98# ifdef RT_ARCH_AMD64
99 if (pReNative->Core.fDebugPcInitialized && cBits >= 32)
100 {
101 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
102 if ((int32_t)offDisp == offDisp || cBits != 64)
103 {
104 /* add [q]word [pVCpu->iem.s.uPcUpdatingDebug], imm32/imm8 */
105 if (cBits == 64)
106 pCodeBuf[off++] = X86_OP_REX_W;
107 pCodeBuf[off++] = (int8_t)offDisp == offDisp ? 0x83 : 0x81;
108 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
109 if ((int8_t)offDisp == offDisp)
110 pCodeBuf[off++] = (int8_t)offDisp;
111 else
112 {
113 *(int32_t *)&pCodeBuf[off] = (int32_t)offDisp;
114 off += sizeof(int32_t);
115 }
116 }
117 else
118 {
119 /* mov tmp0, imm64 */
120 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, offDisp);
121
122 /* add [pVCpu->iem.s.uPcUpdatingDebug], tmp0 */
123 if (cBits == 64)
124 pCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
125 else if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
126 pCodeBuf[off++] = X86_OP_REX_R;
127 pCodeBuf[off++] = 0x01;
128 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0 & 7,
129 RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
130 }
131 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
132 return off;
133 }
134# endif
135
136 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
137 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 12);
138
139 if (pReNative->Core.fDebugPcInitialized)
140 {
141 Log4(("uPcUpdatingDebug+=%ld cBits=%d off=%#x\n", offDisp, cBits, off));
142 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
143 }
144 else
145 {
146 Log4(("uPcUpdatingDebug=rip+%ld cBits=%d off=%#x\n", offDisp, cBits, off));
147 pReNative->Core.fDebugPcInitialized = true;
148 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
149 }
150
151 if (cBits == 64)
152 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxTmpReg, offDisp, IEMNATIVE_REG_FIXED_TMP0);
153 else
154 {
155 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxTmpReg, (int32_t)offDisp, IEMNATIVE_REG_FIXED_TMP0);
156 if (cBits == 16)
157 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, UINT16_MAX);
158 }
159
160 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug),
161 IEMNATIVE_REG_FIXED_TMP0);
162
163 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
164 iemNativeRegFreeTmp(pReNative, idxTmpReg);
165 return off;
166}
167
168
169# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
170DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
171{
172 /* Compare the shadow with the context value, they should match. */
173 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
174 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
175 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
176 return off;
177}
178# endif
179
180#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
181
182/**
183 * Flushes delayed write of a specific guest register.
184 *
185 * This must be called prior to calling CImpl functions and any helpers that use
186 * the guest state (like raising exceptions) and such.
187 *
188 * This optimization has not yet been implemented. The first target would be
189 * RIP updates, since these are the most common ones.
190 */
191DECL_INLINE_THROW(uint32_t)
192iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
193{
194#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
195 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
196#endif
197
198#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
199#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
200 if ( enmClass == kIemNativeGstRegRef_EFlags
201 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
202 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
203#else
204 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
205#endif
206
207 if ( enmClass == kIemNativeGstRegRef_Gpr
208 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
209 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
210#endif
211
212#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
213 if ( enmClass == kIemNativeGstRegRef_XReg
214 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
215 {
216 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
217 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
218 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
219
220 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
221 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
222 }
223#endif
224 RT_NOREF(pReNative, enmClass, idxReg);
225 return off;
226}
227
228
229
230/*********************************************************************************************************************************
231* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
232*********************************************************************************************************************************/
233
234#undef IEM_MC_BEGIN /* unused */
235#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
236 { \
237 Assert(pReNative->Core.bmVars == 0); \
238 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
239 Assert(pReNative->Core.bmStack == 0); \
240 pReNative->fMc = (a_fMcFlags); \
241 pReNative->fCImpl = (a_fCImplFlags); \
242 pReNative->cArgsX = (a_cArgsIncludingHidden)
243
244/** We have to get to the end in recompilation mode, as otherwise we won't
245 * generate code for all the IEM_MC_IF_XXX branches. */
246#define IEM_MC_END() \
247 iemNativeVarFreeAll(pReNative); \
248 } return off
249
250
251
252/*********************************************************************************************************************************
253* Liveness Stubs *
254*********************************************************************************************************************************/
255
256#define IEM_MC_LIVENESS_GREG_INPUT(a_iGReg) ((void)0)
257#define IEM_MC_LIVENESS_GREG_CLOBBER(a_iGReg) ((void)0)
258#define IEM_MC_LIVENESS_GREG_MODIFY(a_iGReg) ((void)0)
259
260#define IEM_MC_LIVENESS_MREG_INPUT(a_iMReg) ((void)0)
261#define IEM_MC_LIVENESS_MREG_CLOBBER(a_iMReg) ((void)0)
262#define IEM_MC_LIVENESS_MREG_MODIFY(a_iMReg) ((void)0)
263
264#define IEM_MC_LIVENESS_XREG_INPUT(a_iXReg) ((void)0)
265#define IEM_MC_LIVENESS_XREG_CLOBBER(a_iXReg) ((void)0)
266#define IEM_MC_LIVENESS_XREG_MODIFY(a_iXReg) ((void)0)
267
268#define IEM_MC_LIVENESS_MXCSR_INPUT() ((void)0)
269#define IEM_MC_LIVENESS_MXCSR_CLOBBER() ((void)0)
270#define IEM_MC_LIVENESS_MXCSR_MODIFY() ((void)0)
271
272
273/*********************************************************************************************************************************
274* Native Emitter Support. *
275*********************************************************************************************************************************/
276
277#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
278
279#define IEM_MC_NATIVE_ELSE() } else {
280
281#define IEM_MC_NATIVE_ENDIF() } ((void)0)
282
283
284#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
285 off = a_fnEmitter(pReNative, off)
286
287#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
288 off = a_fnEmitter(pReNative, off, (a0))
289
290#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
291 off = a_fnEmitter(pReNative, off, (a0), (a1))
292
293#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
294 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
295
296#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
297 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
298
299#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
300 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
301
302#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
303 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
304
305#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
306 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
307
308#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
309 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
310
311#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
312 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
313
314
315#ifndef RT_ARCH_AMD64
316# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
317#else
318/** @note This is a naive approach that ASSUMES that the register isn't
319 * allocated, so it only works safely for the first allocation(s) in
320 * a MC block. */
321# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
322 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
323
324DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg,
325 uint32_t off, bool fAllocated);
326
327DECL_INLINE_THROW(uint32_t)
328iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
329{
330 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
331 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
332 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
333
334# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
335 /* Must flush the register if it hold pending writes. */
336 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
337 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
338 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
339# endif
340
341 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off, false /*fAllocated*/);
342 return off;
343}
344
345#endif /* RT_ARCH_AMD64 */
346
347
348
349/*********************************************************************************************************************************
350* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
351*********************************************************************************************************************************/
352
353#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
354 pReNative->fMc = 0; \
355 pReNative->fCImpl = (a_fFlags); \
356 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
357 a_cbInstr) /** @todo not used ... */
358
359
360#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
361 pReNative->fMc = 0; \
362 pReNative->fCImpl = (a_fFlags); \
363 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
364
365DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
366 uint8_t idxInstr, uint64_t a_fGstShwFlush,
367 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
368{
369 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
370}
371
372
373#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
374 pReNative->fMc = 0; \
375 pReNative->fCImpl = (a_fFlags); \
376 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
377 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
378
379DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
380 uint8_t idxInstr, uint64_t a_fGstShwFlush,
381 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
382{
383 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
384}
385
386
387#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
388 pReNative->fMc = 0; \
389 pReNative->fCImpl = (a_fFlags); \
390 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
391 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
392
393DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
394 uint8_t idxInstr, uint64_t a_fGstShwFlush,
395 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
396 uint64_t uArg2)
397{
398 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
399}
400
401
402
403/*********************************************************************************************************************************
404* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
405*********************************************************************************************************************************/
406
407/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
408 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
409DECL_INLINE_THROW(uint32_t)
410iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
411{
412 /*
413 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
414 * return with special status code and make the execution loop deal with
415 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
416 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
417 * could continue w/o interruption, it probably will drop into the
418 * debugger, so not worth the effort of trying to services it here and we
419 * just lump it in with the handling of the others.
420 *
421 * To simplify the code and the register state management even more (wrt
422 * immediate in AND operation), we always update the flags and skip the
423 * extra check associated conditional jump.
424 */
425 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
426 <= UINT32_MAX);
427#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
428 AssertMsg( pReNative->idxCurCall == 0
429 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
430 IEMLIVENESSBIT_IDX_EFL_OTHER)),
431 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
432 IEMLIVENESSBIT_IDX_EFL_OTHER)));
433#endif
434
435 /*
436 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
437 * any pending register writes must be flushed.
438 */
439 off = iemNativeRegFlushPendingWrites(pReNative, off);
440
441 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ForUpdate,
442 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER),
443 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER));
444 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxEflReg,
445 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
446 kIemNativeLabelType_ReturnWithFlags);
447 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
448 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
449
450 /* Free but don't flush the EFLAGS register. */
451 iemNativeRegFreeTmp(pReNative, idxEflReg);
452
453 return off;
454}
455
456
457/** Helper for iemNativeEmitFinishInstructionWithStatus. */
458DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
459{
460 unsigned const offOpcodes = pCallEntry->offOpcode;
461 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
462 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
463 {
464 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
465 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
466 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
467 }
468 AssertFailedReturn(NIL_RTGCPHYS);
469}
470
471
472/** The VINF_SUCCESS dummy. */
473template<int const a_rcNormal, bool const a_fIsJump>
474DECL_FORCE_INLINE_THROW(uint32_t)
475iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
476 int32_t const offJump)
477{
478 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
479 if (a_rcNormal != VINF_SUCCESS)
480 {
481#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
482 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
483#else
484 RT_NOREF_PV(pCallEntry);
485#endif
486
487 /* As this code returns from the TB any pending register writes must be flushed. */
488 off = iemNativeRegFlushPendingWrites(pReNative, off);
489
490 /*
491 * If we're in a conditional, mark the current branch as exiting so we
492 * can disregard its state when we hit the IEM_MC_ENDIF.
493 */
494 iemNativeMarkCurCondBranchAsExiting(pReNative);
495
496 /*
497 * Use the lookup table for getting to the next TB quickly.
498 * Note! In this code path there can only be one entry at present.
499 */
500 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
501 PCIEMTB const pTbOrg = pReNative->pTbOrg;
502 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
503 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
504
505#if 0
506 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
507 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
508 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
509 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
510 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
511
512 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
513
514#else
515 /* Load the index as argument #1 for the helper call at the given label. */
516 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
517
518 /*
519 * Figure out the physical address of the current instruction and see
520 * whether the next instruction we're about to execute is in the same
521 * page so we by can optimistically skip TLB loading.
522 *
523 * - This is safe for all cases in FLAT mode.
524 * - In segmentmented modes it is complicated, given that a negative
525 * jump may underflow EIP and a forward jump may overflow or run into
526 * CS.LIM and triggering a #GP. The only thing we can get away with
527 * now at compile time is forward jumps w/o CS.LIM checks, since the
528 * lack of CS.LIM checks means we're good for the entire physical page
529 * we're executing on and another 15 bytes before we run into CS.LIM.
530 */
531 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
532# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
533 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
534# endif
535 )
536 {
537 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
538 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
539 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
540 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
541
542 {
543 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
544 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
545
546 /* Load the key lookup flags into the 2nd argument for the helper call.
547 - This is safe wrt CS limit checking since we're only here for FLAT modes.
548 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
549 interrupt shadow.
550 - The NMI inhibiting is more questionable, though... */
551 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
552 * Should we copy it into fExec to simplify this? OTOH, it's just a
553 * couple of extra instructions if EFLAGS are already in a register. */
554 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
555 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
556
557 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
558 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookup);
559 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithIrq);
560 }
561 }
562 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
563 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlb);
564 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq);
565#endif
566 }
567 return off;
568}
569
570
571#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
572 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
573 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
574
575#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
576 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
577 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
578 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
579
580/** Same as iemRegAddToRip64AndFinishingNoFlags. */
581DECL_INLINE_THROW(uint32_t)
582iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
583{
584#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
585# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
586 if (!pReNative->Core.offPc)
587 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
588# endif
589
590 /* Allocate a temporary PC register. */
591 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
592
593 /* Perform the addition and store the result. */
594 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
595 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
596
597 /* Free but don't flush the PC register. */
598 iemNativeRegFreeTmp(pReNative, idxPcReg);
599#endif
600
601#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
602 pReNative->Core.offPc += cbInstr;
603 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
604# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
605 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 64);
606 off = iemNativeEmitPcDebugCheck(pReNative, off);
607# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
608 off = iemNativePcAdjustCheck(pReNative, off);
609# endif
610 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
611#endif
612
613 return off;
614}
615
616
617#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
618 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
619 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
620
621#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
622 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
623 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
624 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
625
626/** Same as iemRegAddToEip32AndFinishingNoFlags. */
627DECL_INLINE_THROW(uint32_t)
628iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
629{
630#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
631# ifdef IEMNATIVE_REG_FIXED_PC_DBG
632 if (!pReNative->Core.offPc)
633 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
634# endif
635
636 /* Allocate a temporary PC register. */
637 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
638
639 /* Perform the addition and store the result. */
640 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
641 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
642
643 /* Free but don't flush the PC register. */
644 iemNativeRegFreeTmp(pReNative, idxPcReg);
645#endif
646
647#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
648 pReNative->Core.offPc += cbInstr;
649 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
650# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
651 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 32);
652 off = iemNativeEmitPcDebugCheck(pReNative, off);
653# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
654 off = iemNativePcAdjustCheck(pReNative, off);
655# endif
656 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
657#endif
658
659 return off;
660}
661
662
663#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
664 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
665 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
666
667#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
668 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
669 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
670 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
671
672/** Same as iemRegAddToIp16AndFinishingNoFlags. */
673DECL_INLINE_THROW(uint32_t)
674iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
675{
676#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
677# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
678 if (!pReNative->Core.offPc)
679 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
680# endif
681
682 /* Allocate a temporary PC register. */
683 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
684
685 /* Perform the addition and store the result. */
686 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
687 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
688 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
689
690 /* Free but don't flush the PC register. */
691 iemNativeRegFreeTmp(pReNative, idxPcReg);
692#endif
693
694#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
695 pReNative->Core.offPc += cbInstr;
696 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
697# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
698 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 16);
699 off = iemNativeEmitPcDebugCheck(pReNative, off);
700# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
701 off = iemNativePcAdjustCheck(pReNative, off);
702# endif
703 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
704#endif
705
706 return off;
707}
708
709
710/*********************************************************************************************************************************
711* Common code for changing PC/RIP/EIP/IP. *
712*********************************************************************************************************************************/
713
714/**
715 * Emits code to check if the content of @a idxAddrReg is a canonical address,
716 * raising a \#GP(0) if it isn't.
717 *
718 * @returns New code buffer offset, UINT32_MAX on failure.
719 * @param pReNative The native recompile state.
720 * @param off The code buffer offset.
721 * @param idxAddrReg The host register with the address to check.
722 * @param idxInstr The current instruction.
723 */
724DECL_FORCE_INLINE_THROW(uint32_t)
725iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
726{
727 /*
728 * Make sure we don't have any outstanding guest register writes as we may
729 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
730 */
731 off = iemNativeRegFlushPendingWrites(pReNative, off);
732
733#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
734 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
735#else
736 RT_NOREF(idxInstr);
737#endif
738
739#ifdef RT_ARCH_AMD64
740 /*
741 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
742 * return raisexcpt();
743 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
744 */
745 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
746
747 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
748 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
749 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
750 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
751 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
752
753 iemNativeRegFreeTmp(pReNative, iTmpReg);
754
755#elif defined(RT_ARCH_ARM64)
756 /*
757 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
758 * return raisexcpt();
759 * ----
760 * mov x1, 0x800000000000
761 * add x1, x0, x1
762 * cmp xzr, x1, lsr 48
763 * b.ne .Lraisexcpt
764 */
765 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
766
767 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
768 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
769 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
770 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
771
772 iemNativeRegFreeTmp(pReNative, iTmpReg);
773
774#else
775# error "Port me"
776#endif
777 return off;
778}
779
780
781/**
782 * Emits code to check if the content of @a idxAddrReg is a canonical address,
783 * raising a \#GP(0) if it isn't.
784 *
785 * Caller makes sure everything is flushed, except maybe PC.
786 *
787 * @returns New code buffer offset, UINT32_MAX on failure.
788 * @param pReNative The native recompile state.
789 * @param off The code buffer offset.
790 * @param idxAddrReg The host register with the address to check.
791 * @param offDisp The relative displacement that has already been
792 * added to idxAddrReg and must be subtracted if
793 * raising a \#GP(0).
794 * @param idxInstr The current instruction.
795 */
796DECL_FORCE_INLINE_THROW(uint32_t)
797iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
798 uint8_t idxAddrReg, int64_t offDisp, uint8_t idxInstr)
799{
800#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
801 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
802#endif
803
804#ifdef RT_ARCH_AMD64
805 /*
806 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
807 * return raisexcpt();
808 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
809 */
810 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
811
812 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
813 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
814 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
815 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
816
817#elif defined(RT_ARCH_ARM64)
818 /*
819 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
820 * return raisexcpt();
821 * ----
822 * mov x1, 0x800000000000
823 * add x1, x0, x1
824 * cmp xzr, x1, lsr 48
825 * b.ne .Lraisexcpt
826 */
827 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
828
829 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
830 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
831 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
832#else
833# error "Port me"
834#endif
835
836 /* Jump to the #GP code (hoping static prediction considers forward branches as not-taken). */
837 uint32_t const offFixup1 = off;
838 off = iemNativeEmitJnzToFixed(pReNative, off, off /*8-bit jump suffices*/);
839
840 /* jump .Lnoexcept; Skip the #GP code. */
841 uint32_t const offFixup2 = off;
842 off = iemNativeEmitJmpToFixed(pReNative, off, off /*8-bit jump suffices*/);
843
844 /* .Lraisexcpt: */
845 iemNativeFixupFixedJump(pReNative, offFixup1, off);
846#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
847 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
848#else
849 RT_NOREF(idxInstr);
850#endif
851
852 /* Undo the PC adjustment and store the old PC value. */
853 off = iemNativeEmitSubGprImm(pReNative, off, idxAddrReg, offDisp, iTmpReg);
854 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxAddrReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
855
856 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
857
858 /* .Lnoexcept: */
859 iemNativeFixupFixedJump(pReNative, offFixup2, off);
860
861 iemNativeRegFreeTmp(pReNative, iTmpReg);
862 return off;
863}
864
865
866/**
867 * Emits code to check if the content of @a idxAddrReg is a canonical address,
868 * raising a \#GP(0) if it isn't.
869 *
870 * Caller makes sure everything is flushed, except maybe PC.
871 *
872 * @returns New code buffer offset, UINT32_MAX on failure.
873 * @param pReNative The native recompile state.
874 * @param off The code buffer offset.
875 * @param idxAddrReg The host register with the address to check.
876 * @param idxOldPcReg Register holding the old PC that offPc is relative
877 * to if available, otherwise UINT8_MAX.
878 * @param idxInstr The current instruction.
879 */
880DECL_FORCE_INLINE_THROW(uint32_t)
881iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
882 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
883{
884#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
885 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
886#endif
887
888#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
889# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
890 if (!pReNative->Core.offPc)
891# endif
892 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
893#else
894 RT_NOREF(idxInstr);
895#endif
896
897#ifdef RT_ARCH_AMD64
898 /*
899 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
900 * return raisexcpt();
901 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
902 */
903 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
904
905 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
906 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
907 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
908 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
909
910#elif defined(RT_ARCH_ARM64)
911 /*
912 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
913 * return raisexcpt();
914 * ----
915 * mov x1, 0x800000000000
916 * add x1, x0, x1
917 * cmp xzr, x1, lsr 48
918 * b.ne .Lraisexcpt
919 */
920 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
921
922 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
923 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
924 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
925#else
926# error "Port me"
927#endif
928
929#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
930 if (pReNative->Core.offPc)
931 {
932 /** @todo On x86, it is said that conditional jumps forward are statically
933 * predicited as not taken, so this isn't a very good construct.
934 * Investigate whether it makes sense to invert it and add another
935 * jump. Also, find out wtf the static predictor does here on arm! */
936 uint32_t const offFixup = off;
937 off = iemNativeEmitJzToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
938
939 /* .Lraisexcpt: */
940# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
941 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
942# endif
943 /* We need to update cpum.GstCtx.rip. */
944 if (idxOldPcReg == UINT8_MAX)
945 {
946 idxOldPcReg = iTmpReg;
947 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
948 }
949 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
950 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
951
952 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
953 iemNativeFixupFixedJump(pReNative, offFixup, off);
954 }
955 else
956#endif
957 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
958
959 iemNativeRegFreeTmp(pReNative, iTmpReg);
960
961 return off;
962}
963
964
965/**
966 * Emits code to check if that the content of @a idxAddrReg is within the limit
967 * of CS, raising a \#GP(0) if it isn't.
968 *
969 * @returns New code buffer offset; throws VBox status code on error.
970 * @param pReNative The native recompile state.
971 * @param off The code buffer offset.
972 * @param idxAddrReg The host register (32-bit) with the address to
973 * check.
974 * @param idxInstr The current instruction.
975 */
976DECL_FORCE_INLINE_THROW(uint32_t)
977iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
978 uint8_t idxAddrReg, uint8_t idxInstr)
979{
980 /*
981 * Make sure we don't have any outstanding guest register writes as we may
982 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
983 */
984 off = iemNativeRegFlushPendingWrites(pReNative, off);
985
986#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
987 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
988#else
989 RT_NOREF(idxInstr);
990#endif
991
992 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
993 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
994 kIemNativeGstRegUse_ReadOnly);
995
996 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
997 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
998
999 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1000 return off;
1001}
1002
1003
1004
1005
1006/**
1007 * Emits code to check if that the content of @a idxAddrReg is within the limit
1008 * of CS, raising a \#GP(0) if it isn't.
1009 *
1010 * Caller makes sure everything is flushed, except maybe PC.
1011 *
1012 * @returns New code buffer offset; throws VBox status code on error.
1013 * @param pReNative The native recompile state.
1014 * @param off The code buffer offset.
1015 * @param idxAddrReg The host register (32-bit) with the address to
1016 * check.
1017 * @param idxOldPcReg Register holding the old PC that offPc is relative
1018 * to if available, otherwise UINT8_MAX.
1019 * @param idxInstr The current instruction.
1020 */
1021DECL_FORCE_INLINE_THROW(uint32_t)
1022iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1023 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
1024{
1025#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1026 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1027#endif
1028
1029#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1030# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1031 if (!pReNative->Core.offPc)
1032# endif
1033 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1034#else
1035 RT_NOREF(idxInstr);
1036#endif
1037
1038 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
1039 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
1040 kIemNativeGstRegUse_ReadOnly);
1041
1042 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
1043#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1044 if (pReNative->Core.offPc)
1045 {
1046 uint32_t const offFixup = off;
1047 off = iemNativeEmitJbeToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
1048
1049 /* Raising a GP(0), but first we need to update cpum.GstCtx.rip. */
1050 if (idxOldPcReg == UINT8_MAX)
1051 {
1052 idxOldPcReg = idxAddrReg;
1053 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1054 }
1055 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
1056 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1057# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1058 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1059# endif
1060 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
1061 iemNativeFixupFixedJump(pReNative, offFixup, off);
1062 }
1063 else
1064#endif
1065 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
1066
1067 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1068 return off;
1069}
1070
1071
1072/*********************************************************************************************************************************
1073* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
1074*********************************************************************************************************************************/
1075
1076#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1077 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1078 (a_enmEffOpSize), pCallEntry->idxInstr); \
1079 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1080
1081#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1082 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1083 (a_enmEffOpSize), pCallEntry->idxInstr); \
1084 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1085 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1086
1087#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
1088 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1089 IEMMODE_16BIT, pCallEntry->idxInstr); \
1090 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1091
1092#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1093 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1094 IEMMODE_16BIT, pCallEntry->idxInstr); \
1095 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1096 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1097
1098#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
1099 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1100 IEMMODE_64BIT, pCallEntry->idxInstr); \
1101 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1102
1103#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1104 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1105 IEMMODE_64BIT, pCallEntry->idxInstr); \
1106 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1107 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1108
1109
1110#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1111 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1112 (a_enmEffOpSize), pCallEntry->idxInstr); \
1113 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1114
1115#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1116 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1117 (a_enmEffOpSize), pCallEntry->idxInstr); \
1118 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1119 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1120
1121#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
1122 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1123 IEMMODE_16BIT, pCallEntry->idxInstr); \
1124 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1125
1126#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1127 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1128 IEMMODE_16BIT, pCallEntry->idxInstr); \
1129 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1130 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1131
1132#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
1133 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1134 IEMMODE_64BIT, pCallEntry->idxInstr); \
1135 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1136
1137#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1138 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1139 IEMMODE_64BIT, pCallEntry->idxInstr); \
1140 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1141 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1142
1143/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
1144 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
1145 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
1146template<bool const a_fWithinPage>
1147DECL_INLINE_THROW(uint32_t)
1148iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1149 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1150{
1151 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
1152#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1153 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1154 if (a_fWithinPage && enmEffOpSize == IEMMODE_64BIT)
1155 {
1156 /* No #GP checking required, just update offPc and get on with it. */
1157 pReNative->Core.offPc += (int64_t)offDisp + cbInstr;
1158# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1159 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1160# endif
1161 }
1162 else
1163#endif
1164 {
1165 /* Flush all but PC iff we're doing a 64-bit update here and this isn't within a page.. */
1166 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT && !a_fWithinPage))
1167 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1168
1169 /* Allocate a temporary PC register. */
1170 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1171 kIemNativeGstRegUse_ForUpdate);
1172
1173 /* Perform the addition. */
1174 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr + pReNative->Core.offPc);
1175
1176 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
1177 {
1178 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
1179 We can skip this if the target is within the same page. */
1180 if (!a_fWithinPage)
1181 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(pReNative, off, idxPcReg,
1182 (int64_t)offDisp + cbInstr, idxInstr);
1183 }
1184 else
1185 {
1186 /* Just truncate the result to 16-bit IP. */
1187 Assert(enmEffOpSize == IEMMODE_16BIT);
1188 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1189 }
1190
1191#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1192# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1193 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1194 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1195# endif
1196 /* Since we've already got the new PC value in idxPcReg, we can just as
1197 well write it out and reset offPc to zero. Otherwise, we'd need to use
1198 a copy the shadow PC, which will cost another move instruction here. */
1199# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS)
1200 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
1201 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
1202 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
1203 Log4(("iemNativeEmitRip64RelativeJumpAndFinishingNoFlags: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u cCondDepth=%d\n",
1204 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped, pReNative->cCondDepth));
1205 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
1206# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1207 iemNativeDbgInfoAddNativeOffset(pReNative, off);
1208 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
1209# endif
1210# endif
1211 pReNative->Core.offPc = 0;
1212#endif
1213
1214 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1215
1216 /* Free but don't flush the PC register. */
1217 iemNativeRegFreeTmp(pReNative, idxPcReg);
1218 }
1219 return off;
1220}
1221
1222
1223#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1224 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1225 (a_enmEffOpSize), pCallEntry->idxInstr); \
1226 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1227
1228#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1229 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1230 (a_enmEffOpSize), pCallEntry->idxInstr); \
1231 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1232 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1233
1234#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
1235 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1236 IEMMODE_16BIT, pCallEntry->idxInstr); \
1237 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1238
1239#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1240 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1241 IEMMODE_16BIT, pCallEntry->idxInstr); \
1242 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1243 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1244
1245#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
1246 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1247 IEMMODE_32BIT, pCallEntry->idxInstr); \
1248 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1249
1250#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1251 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1252 IEMMODE_32BIT, pCallEntry->idxInstr); \
1253 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1254 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1255
1256
1257#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1258 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1259 (a_enmEffOpSize), pCallEntry->idxInstr); \
1260 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1261
1262#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1263 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1264 (a_enmEffOpSize), pCallEntry->idxInstr); \
1265 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1266 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1267
1268#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
1269 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1270 IEMMODE_16BIT, pCallEntry->idxInstr); \
1271 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1272
1273#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1274 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1275 IEMMODE_16BIT, pCallEntry->idxInstr); \
1276 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1277 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1278
1279#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
1280 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1281 IEMMODE_32BIT, pCallEntry->idxInstr); \
1282 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1283
1284#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1285 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1286 IEMMODE_32BIT, pCallEntry->idxInstr); \
1287 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1288 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1289
1290/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
1291 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
1292 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
1293template<bool const a_fFlat>
1294DECL_INLINE_THROW(uint32_t)
1295iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1296 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1297{
1298 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
1299#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1300 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1301#endif
1302
1303 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1304 if (!a_fFlat || enmEffOpSize == IEMMODE_16BIT)
1305 {
1306 off = iemNativeRegFlushPendingWrites(pReNative, off);
1307#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1308 Assert(pReNative->Core.offPc == 0);
1309#endif
1310 }
1311
1312 /* Allocate a temporary PC register. */
1313 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1314
1315 /* Perform the addition. */
1316#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1317 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1318#else
1319 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1320#endif
1321
1322 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
1323 if (enmEffOpSize == IEMMODE_16BIT)
1324 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1325
1326 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
1327 if (!a_fFlat)
1328 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1329
1330 /* Commit it. */
1331#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1332 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, enmEffOpSize == IEMMODE_32BIT ? 32 : 16);
1333 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1334#endif
1335
1336 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1337#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1338 pReNative->Core.offPc = 0;
1339#endif
1340
1341 /* Free but don't flush the PC register. */
1342 iemNativeRegFreeTmp(pReNative, idxPcReg);
1343
1344 return off;
1345}
1346
1347
1348#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
1349 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1350 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1351
1352#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
1353 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1354 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1355 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1356
1357#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
1358 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1359 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1360
1361#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1362 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1363 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1364 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1365
1366#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
1367 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1368 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1369
1370#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1371 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1372 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1373 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1374
1375/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
1376DECL_INLINE_THROW(uint32_t)
1377iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1378 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
1379{
1380 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1381 off = iemNativeRegFlushPendingWrites(pReNative, off);
1382
1383#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1384 Assert(pReNative->Core.offPc == 0);
1385 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1386#endif
1387
1388 /* Allocate a temporary PC register. */
1389 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1390
1391 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
1392 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
1393 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1394 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1395#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1396 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, 16);
1397 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1398#endif
1399 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1400
1401 /* Free but don't flush the PC register. */
1402 iemNativeRegFreeTmp(pReNative, idxPcReg);
1403
1404 return off;
1405}
1406
1407
1408
1409/*********************************************************************************************************************************
1410* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
1411*********************************************************************************************************************************/
1412
1413/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
1414#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
1415 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1416
1417/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
1418#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
1419 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1420
1421/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
1422#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
1423 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1424
1425/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
1426 * clears flags. */
1427#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
1428 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
1429 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1430
1431/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
1432 * clears flags. */
1433#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
1434 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
1435 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1436
1437/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
1438 * clears flags. */
1439#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
1440 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
1441 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1442
1443#undef IEM_MC_SET_RIP_U16_AND_FINISH
1444
1445
1446/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
1447#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
1448 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1449
1450/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
1451#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
1452 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1453
1454/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
1455 * clears flags. */
1456#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
1457 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
1458 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1459
1460/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
1461 * and clears flags. */
1462#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
1463 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
1464 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1465
1466#undef IEM_MC_SET_RIP_U32_AND_FINISH
1467
1468
1469/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
1470#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
1471 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1472
1473/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
1474 * and clears flags. */
1475#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
1476 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
1477 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1478
1479#undef IEM_MC_SET_RIP_U64_AND_FINISH
1480
1481
1482/** Same as iemRegRipJumpU16AndFinishNoFlags,
1483 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
1484DECL_INLINE_THROW(uint32_t)
1485iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
1486 uint8_t idxInstr, uint8_t cbVar)
1487{
1488 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1489 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1490
1491 /* If we can't rule out a #GP(0) below, flush all dirty register except for
1492 PC which will be handled specially by the two workers below if they raise a GP. */
1493 bool const fMayRaiseGp0 = (f64Bit && cbVar > sizeof(uint32_t)) || (!f64Bit && !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1494 uint8_t const idxOldPcReg = fMayRaiseGp0
1495 ? iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_Pc)
1496 : UINT8_MAX;
1497 if (fMayRaiseGp0)
1498 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1499
1500 /* Get a register with the new PC loaded from idxVarPc.
1501 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1502 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1503
1504 /* Check that the target is within CS.LIM / is canonical (may #GP(0) + exit TB). */
1505 if (fMayRaiseGp0)
1506 {
1507 if (f64Bit)
1508 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1509 else
1510 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1511 }
1512
1513 /* Store the result. */
1514 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1515
1516#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1517 pReNative->Core.offPc = 0;
1518 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1519# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1520 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1521 pReNative->Core.fDebugPcInitialized = true;
1522 Log4(("uPcUpdatingDebug=rip off=%#x\n", off));
1523# endif
1524#endif
1525
1526 if (idxOldPcReg != UINT8_MAX)
1527 iemNativeRegFreeTmp(pReNative, idxOldPcReg);
1528 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1529 /** @todo implictly free the variable? */
1530
1531 return off;
1532}
1533
1534
1535
1536/*********************************************************************************************************************************
1537* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1538*********************************************************************************************************************************/
1539
1540/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1541 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1542DECL_FORCE_INLINE_THROW(uint32_t)
1543iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1544{
1545 /* Use16BitSp: */
1546#ifdef RT_ARCH_AMD64
1547 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1548 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1549#else
1550 /* sub regeff, regrsp, #cbMem */
1551 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1552 /* and regeff, regeff, #0xffff */
1553 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1554 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1555 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1556 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1557#endif
1558 return off;
1559}
1560
1561
1562DECL_FORCE_INLINE(uint32_t)
1563iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1564{
1565 /* Use32BitSp: */
1566 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1567 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1568 return off;
1569}
1570
1571
1572DECL_INLINE_THROW(uint32_t)
1573iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1574 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
1575{
1576 /*
1577 * Assert sanity.
1578 */
1579#ifdef VBOX_STRICT
1580 if (RT_BYTE2(cBitsVarAndFlat) != 0)
1581 {
1582 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1583 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1584 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1585 Assert( pfnFunction
1586 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1587 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1588 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1589 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1590 : UINT64_C(0xc000b000a0009000) ));
1591 }
1592 else
1593 Assert( pfnFunction
1594 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1595 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1596 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1597 : UINT64_C(0xc000b000a0009000) ));
1598#endif
1599
1600#ifdef VBOX_STRICT
1601 /*
1602 * Check that the fExec flags we've got make sense.
1603 */
1604 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1605#endif
1606
1607 /*
1608 * To keep things simple we have to commit any pending writes first as we
1609 * may end up making calls.
1610 */
1611 /** @todo we could postpone this till we make the call and reload the
1612 * registers after returning from the call. Not sure if that's sensible or
1613 * not, though. */
1614 off = iemNativeRegFlushPendingWrites(pReNative, off);
1615
1616 /*
1617 * First we calculate the new RSP and the effective stack pointer value.
1618 * For 64-bit mode and flat 32-bit these two are the same.
1619 * (Code structure is very similar to that of PUSH)
1620 */
1621 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1622 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1623 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1624 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1625 ? cbMem : sizeof(uint16_t);
1626 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1627 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1628 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1629 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1630 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1631 if (cBitsFlat != 0)
1632 {
1633 Assert(idxRegEffSp == idxRegRsp);
1634 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1635 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1636 if (cBitsFlat == 64)
1637 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1638 else
1639 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1640 }
1641 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1642 {
1643 Assert(idxRegEffSp != idxRegRsp);
1644 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1645 kIemNativeGstRegUse_ReadOnly);
1646#ifdef RT_ARCH_AMD64
1647 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1648#else
1649 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1650#endif
1651 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1652 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1653 offFixupJumpToUseOtherBitSp = off;
1654 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1655 {
1656 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1657 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1658 }
1659 else
1660 {
1661 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1662 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1663 }
1664 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1665 }
1666 /* SpUpdateEnd: */
1667 uint32_t const offLabelSpUpdateEnd = off;
1668
1669 /*
1670 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1671 * we're skipping lookup).
1672 */
1673 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1674 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1675 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1676 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1677 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1678 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1679 : UINT32_MAX;
1680 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1681
1682
1683 if (!TlbState.fSkip)
1684 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1685 else
1686 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1687
1688 /*
1689 * Use16BitSp:
1690 */
1691 if (cBitsFlat == 0)
1692 {
1693#ifdef RT_ARCH_AMD64
1694 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1695#else
1696 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1697#endif
1698 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1699 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1700 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1701 else
1702 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1703 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1704 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1705 }
1706
1707 /*
1708 * TlbMiss:
1709 *
1710 * Call helper to do the pushing.
1711 */
1712 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1713
1714#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1715 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1716#else
1717 RT_NOREF(idxInstr);
1718#endif
1719
1720 /* Save variables in volatile registers. */
1721 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1722 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1723 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1724 | (RT_BIT_32(idxRegPc));
1725 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1726
1727 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1728 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1729 {
1730 /* Swap them using ARG0 as temp register: */
1731 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1732 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1733 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1734 }
1735 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1736 {
1737 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1738 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1739
1740 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1741 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1742 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1743 }
1744 else
1745 {
1746 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1747 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1748
1749 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1750 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1751 }
1752
1753 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1754 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1755
1756 /* Done setting up parameters, make the call. */
1757 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1758
1759 /* Restore variables and guest shadow registers to volatile registers. */
1760 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1761 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1762
1763#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1764 if (!TlbState.fSkip)
1765 {
1766 /* end of TlbMiss - Jump to the done label. */
1767 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1768 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1769
1770 /*
1771 * TlbLookup:
1772 */
1773 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1774 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1775
1776 /*
1777 * Emit code to do the actual storing / fetching.
1778 */
1779 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1780# ifdef IEM_WITH_TLB_STATISTICS
1781 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1782 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1783# endif
1784 switch (cbMemAccess)
1785 {
1786 case 2:
1787 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1788 break;
1789 case 4:
1790 if (!fIsIntelSeg)
1791 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1792 else
1793 {
1794 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1795 PUSH FS in real mode, so we have to try emulate that here.
1796 We borrow the now unused idxReg1 from the TLB lookup code here. */
1797 uint8_t const idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1798 kIemNativeGstReg_EFlags);
1799 if (idxRegEfl != UINT8_MAX)
1800 {
1801#ifdef ARCH_AMD64
1802 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1803 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1804 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1805#else
1806 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1807 off, TlbState.idxReg1, idxRegEfl,
1808 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1809#endif
1810 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1811 }
1812 else
1813 {
1814 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1815 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1816 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1817 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1818 }
1819 /* ASSUMES the upper half of idxRegPc is ZERO. */
1820 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1821 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1822 }
1823 break;
1824 case 8:
1825 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1826 break;
1827 default:
1828 AssertFailed();
1829 }
1830
1831 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1832 TlbState.freeRegsAndReleaseVars(pReNative);
1833
1834 /*
1835 * TlbDone:
1836 *
1837 * Commit the new RSP value.
1838 */
1839 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1840 }
1841#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1842
1843#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1844 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1845#endif
1846 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1847 if (idxRegEffSp != idxRegRsp)
1848 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1849
1850 return off;
1851}
1852
1853
1854/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1855#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1856 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1857
1858/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1859 * clears flags. */
1860#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1861 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1862 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1863
1864/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1865#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1866 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1867
1868/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1869 * clears flags. */
1870#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1871 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1872 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1873
1874#undef IEM_MC_IND_CALL_U16_AND_FINISH
1875
1876
1877/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1878#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1879 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1880
1881/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1882 * clears flags. */
1883#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1884 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1885 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1886
1887#undef IEM_MC_IND_CALL_U32_AND_FINISH
1888
1889
1890/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1891 * an extra parameter, for use in 64-bit code. */
1892#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1893 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1894
1895
1896/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1897 * an extra parameter, for use in 64-bit code and we need to check and clear
1898 * flags. */
1899#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1900 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1901 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1902
1903#undef IEM_MC_IND_CALL_U64_AND_FINISH
1904
1905/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1906 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1907DECL_INLINE_THROW(uint32_t)
1908iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1909 uint8_t idxInstr, uint8_t cbVar)
1910{
1911 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1912 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1913
1914 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1915 off = iemNativeRegFlushPendingWrites(pReNative, off);
1916
1917#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1918 Assert(pReNative->Core.offPc == 0);
1919 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1920#endif
1921
1922 /* Get a register with the new PC loaded from idxVarPc.
1923 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1924 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1925
1926 /* Check limit (may #GP(0) + exit TB). */
1927 if (!f64Bit)
1928/** @todo we can skip this test in FLAT 32-bit mode. */
1929 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1930 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1931 else if (cbVar > sizeof(uint32_t))
1932 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1933
1934#if 1
1935 /* Allocate a temporary PC register, we don't want it shadowed. */
1936 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1937 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1938#else
1939 /* Allocate a temporary PC register. */
1940 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1941 true /*fNoVolatileRegs*/);
1942#endif
1943
1944 /* Perform the addition and push the variable to the guest stack. */
1945 /** @todo Flat variants for PC32 variants. */
1946 switch (cbVar)
1947 {
1948 case sizeof(uint16_t):
1949 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1950 /* Truncate the result to 16-bit IP. */
1951 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1952 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1953 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1954 break;
1955 case sizeof(uint32_t):
1956 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1957 /** @todo In FLAT mode we can use the flat variant. */
1958 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1959 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1960 break;
1961 case sizeof(uint64_t):
1962 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1963 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1964 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1965 break;
1966 default:
1967 AssertFailed();
1968 }
1969
1970 /* RSP got changed, so do this again. */
1971 off = iemNativeRegFlushPendingWrites(pReNative, off);
1972
1973 /* Store the result. */
1974 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1975#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1976 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1977 pReNative->Core.fDebugPcInitialized = true;
1978 Log4(("uPcUpdatingDebug=rip/indirect-call off=%#x\n", off));
1979#endif
1980
1981#if 1
1982 /* Need to transfer the shadow information to the new RIP register. */
1983 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1984#else
1985 /* Sync the new PC. */
1986 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1987#endif
1988 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1989 iemNativeRegFreeTmp(pReNative, idxPcReg);
1990 /** @todo implictly free the variable? */
1991
1992 return off;
1993}
1994
1995
1996/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1997 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1998#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1999 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2000
2001/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2002 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
2003 * flags. */
2004#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
2005 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
2006 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2007
2008/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2009 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2010#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
2011 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2012
2013/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2014 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2015 * flags. */
2016#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
2017 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
2018 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2019
2020/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2021 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2022#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
2023 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2024
2025/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2026 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2027 * flags. */
2028#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
2029 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
2030 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2031
2032#undef IEM_MC_REL_CALL_S16_AND_FINISH
2033
2034/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2035 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2036DECL_INLINE_THROW(uint32_t)
2037iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
2038 uint8_t idxInstr)
2039{
2040 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2041 off = iemNativeRegFlushPendingWrites(pReNative, off);
2042
2043#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2044 Assert(pReNative->Core.offPc == 0);
2045 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2046#endif
2047
2048 /* Allocate a temporary PC register. */
2049 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2050 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
2051 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2052
2053 /* Calculate the new RIP. */
2054 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2055 /* Truncate the result to 16-bit IP. */
2056 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
2057 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2058 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2059
2060 /* Truncate the result to 16-bit IP. */
2061 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
2062
2063 /* Check limit (may #GP(0) + exit TB). */
2064 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2065
2066 /* Perform the addition and push the variable to the guest stack. */
2067 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
2068 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
2069
2070 /* RSP got changed, so flush again. */
2071 off = iemNativeRegFlushPendingWrites(pReNative, off);
2072
2073 /* Store the result. */
2074 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2075#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2076 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2077 pReNative->Core.fDebugPcInitialized = true;
2078 Log4(("uPcUpdatingDebug=rip/rel-call-16 off=%#x offDisp=%d\n", off, offDisp));
2079#endif
2080
2081 /* Need to transfer the shadow information to the new RIP register. */
2082 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2083 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2084 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2085
2086 return off;
2087}
2088
2089
2090/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2091 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2092#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
2093 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
2094
2095/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2096 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2097 * flags. */
2098#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
2099 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
2100 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2101
2102#undef IEM_MC_REL_CALL_S32_AND_FINISH
2103
2104/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2105 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2106DECL_INLINE_THROW(uint32_t)
2107iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
2108 uint8_t idxInstr)
2109{
2110 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2111 off = iemNativeRegFlushPendingWrites(pReNative, off);
2112
2113#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2114 Assert(pReNative->Core.offPc == 0);
2115 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2116#endif
2117
2118 /* Allocate a temporary PC register. */
2119 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2120 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2121 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2122
2123 /* Update the EIP to get the return address. */
2124 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2125
2126 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2127 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2128 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2129 /** @todo we can skip this test in FLAT 32-bit mode. */
2130 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2131
2132 /* Perform Perform the return address to the guest stack. */
2133 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
2134 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
2135 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
2136
2137 /* RSP got changed, so do this again. */
2138 off = iemNativeRegFlushPendingWrites(pReNative, off);
2139
2140 /* Store the result. */
2141 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2142#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2143 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2144 pReNative->Core.fDebugPcInitialized = true;
2145 Log4(("uPcUpdatingDebug=eip/rel-call-32 off=%#x offDisp=%d\n", off, offDisp));
2146#endif
2147
2148 /* Need to transfer the shadow information to the new RIP register. */
2149 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2150 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2151 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2152
2153 return off;
2154}
2155
2156
2157/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2158 * an extra parameter, for use in 64-bit code. */
2159#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
2160 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
2161
2162/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2163 * an extra parameter, for use in 64-bit code and we need to check and clear
2164 * flags. */
2165#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
2166 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
2167 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2168
2169#undef IEM_MC_REL_CALL_S64_AND_FINISH
2170
2171/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2172 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2173DECL_INLINE_THROW(uint32_t)
2174iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
2175 uint8_t idxInstr)
2176{
2177 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2178 off = iemNativeRegFlushPendingWrites(pReNative, off);
2179
2180#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2181 Assert(pReNative->Core.offPc == 0);
2182 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2183#endif
2184
2185 /* Allocate a temporary PC register. */
2186 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2187 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2188 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2189
2190 /* Update the RIP to get the return address. */
2191 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
2192
2193 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2194 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
2195 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
2196 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2197
2198 /* Perform Perform the return address to the guest stack. */
2199 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
2200 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
2201
2202 /* RSP got changed, so do this again. */
2203 off = iemNativeRegFlushPendingWrites(pReNative, off);
2204
2205 /* Store the result. */
2206 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2207#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2208 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2209 pReNative->Core.fDebugPcInitialized = true;
2210 Log4(("uPcUpdatingDebug=rip/rel-call-64 off=%#x offDisp=%ld\n", off, offDisp));
2211#endif
2212
2213 /* Need to transfer the shadow information to the new RIP register. */
2214 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2215 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2216 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2217
2218 return off;
2219}
2220
2221
2222/*********************************************************************************************************************************
2223* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
2224*********************************************************************************************************************************/
2225
2226DECL_FORCE_INLINE_THROW(uint32_t)
2227iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2228 uint16_t cbPopAdd, uint8_t idxRegTmp)
2229{
2230 /* Use16BitSp: */
2231#ifdef RT_ARCH_AMD64
2232 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2233 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
2234 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
2235 RT_NOREF(idxRegTmp);
2236
2237#elif defined(RT_ARCH_ARM64)
2238 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
2239 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
2240 /* add tmp, regrsp, #cbMem */
2241 uint16_t const cbCombined = cbMem + cbPopAdd;
2242 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
2243 if (cbCombined >= RT_BIT_32(12))
2244 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
2245 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2246 /* and tmp, tmp, #0xffff */
2247 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2248 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
2249 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
2250 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
2251
2252#else
2253# error "Port me"
2254#endif
2255 return off;
2256}
2257
2258
2259DECL_FORCE_INLINE_THROW(uint32_t)
2260iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2261 uint16_t cbPopAdd)
2262{
2263 /* Use32BitSp: */
2264 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2265 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
2266 return off;
2267}
2268
2269
2270/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
2271#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
2272 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
2273
2274/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
2275#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2276 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
2277
2278/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
2279#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2280 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
2281
2282/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
2283 * clears flags. */
2284#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
2285 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
2286 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2287
2288/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
2289 * clears flags. */
2290#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2291 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
2292 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2293
2294/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
2295 * clears flags. */
2296#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2297 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
2298 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2299
2300/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
2301DECL_INLINE_THROW(uint32_t)
2302iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
2303 IEMMODE enmEffOpSize, uint8_t idxInstr)
2304{
2305 RT_NOREF(cbInstr);
2306
2307#ifdef VBOX_STRICT
2308 /*
2309 * Check that the fExec flags we've got make sense.
2310 */
2311 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
2312#endif
2313
2314 /*
2315 * To keep things simple we have to commit any pending writes first as we
2316 * may end up making calls.
2317 */
2318 off = iemNativeRegFlushPendingWrites(pReNative, off);
2319
2320 /*
2321 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
2322 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
2323 * directly as the effective stack pointer.
2324 * (Code structure is very similar to that of PUSH)
2325 *
2326 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
2327 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
2328 * aren't commonly used (or useful) and thus not in need of optimizing.
2329 *
2330 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
2331 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
2332 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
2333 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
2334 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
2335 */
2336 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
2337 ? sizeof(uint64_t)
2338 : enmEffOpSize == IEMMODE_32BIT
2339 ? sizeof(uint32_t)
2340 : sizeof(uint16_t);
2341 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
2342 uintptr_t const pfnFunction = fFlat
2343 ? enmEffOpSize == IEMMODE_64BIT
2344 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
2345 : (uintptr_t)iemNativeHlpStackFlatFetchU32
2346 : enmEffOpSize == IEMMODE_32BIT
2347 ? (uintptr_t)iemNativeHlpStackFetchU32
2348 : (uintptr_t)iemNativeHlpStackFetchU16;
2349 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
2350 fFlat ? kIemNativeGstRegUse_ForUpdate
2351 : kIemNativeGstRegUse_Calculation,
2352 true /*fNoVolatileRegs*/);
2353 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
2354 /** @todo can do a better job picking the register here. For cbMem >= 4 this
2355 * will be the resulting register value. */
2356 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
2357
2358 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
2359 if (fFlat)
2360 Assert(idxRegEffSp == idxRegRsp);
2361 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
2362 {
2363 Assert(idxRegEffSp != idxRegRsp);
2364 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
2365 kIemNativeGstRegUse_ReadOnly);
2366#ifdef RT_ARCH_AMD64
2367 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2368#else
2369 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2370#endif
2371 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
2372 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
2373 offFixupJumpToUseOtherBitSp = off;
2374 if (enmEffOpSize == IEMMODE_32BIT)
2375 {
2376 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
2377 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2378 }
2379 else
2380 {
2381 Assert(enmEffOpSize == IEMMODE_16BIT);
2382 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
2383 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2384 idxRegMemResult);
2385 }
2386 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2387 }
2388 /* SpUpdateEnd: */
2389 uint32_t const offLabelSpUpdateEnd = off;
2390
2391 /*
2392 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
2393 * we're skipping lookup).
2394 */
2395 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
2396 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
2397 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
2398 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
2399 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
2400 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
2401 : UINT32_MAX;
2402
2403 if (!TlbState.fSkip)
2404 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
2405 else
2406 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
2407
2408 /*
2409 * Use16BitSp:
2410 */
2411 if (!fFlat)
2412 {
2413#ifdef RT_ARCH_AMD64
2414 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2415#else
2416 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2417#endif
2418 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
2419 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
2420 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2421 idxRegMemResult);
2422 else
2423 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2424 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
2425 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2426 }
2427
2428 /*
2429 * TlbMiss:
2430 *
2431 * Call helper to do the pushing.
2432 */
2433 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
2434
2435#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2436 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2437#else
2438 RT_NOREF(idxInstr);
2439#endif
2440
2441 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
2442 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
2443 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
2444 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
2445
2446
2447 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
2448 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
2449 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
2450
2451 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
2452 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2453
2454 /* Done setting up parameters, make the call. */
2455 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
2456
2457 /* Move the return register content to idxRegMemResult. */
2458 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
2459 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
2460
2461 /* Restore variables and guest shadow registers to volatile registers. */
2462 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
2463 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
2464
2465#ifdef IEMNATIVE_WITH_TLB_LOOKUP
2466 if (!TlbState.fSkip)
2467 {
2468 /* end of TlbMiss - Jump to the done label. */
2469 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
2470 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
2471
2472 /*
2473 * TlbLookup:
2474 */
2475 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
2476 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
2477
2478 /*
2479 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
2480 */
2481 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2482# ifdef IEM_WITH_TLB_STATISTICS
2483 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
2484 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
2485# endif
2486 switch (cbMem)
2487 {
2488 case 2:
2489 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2490 break;
2491 case 4:
2492 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2493 break;
2494 case 8:
2495 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2496 break;
2497 default:
2498 AssertFailed();
2499 }
2500
2501 TlbState.freeRegsAndReleaseVars(pReNative);
2502
2503 /*
2504 * TlbDone:
2505 *
2506 * Set the new RSP value (FLAT accesses needs to calculate it first) and
2507 * commit the popped register value.
2508 */
2509 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
2510 }
2511#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
2512
2513 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
2514 if (!f64Bit)
2515/** @todo we can skip this test in FLAT 32-bit mode. */
2516 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2517 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2518 else if (enmEffOpSize == IEMMODE_64BIT)
2519 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2520
2521 /* Complete RSP calculation for FLAT mode. */
2522 if (idxRegEffSp == idxRegRsp)
2523 {
2524 if (enmEffOpSize == IEMMODE_64BIT)
2525 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
2526 else
2527 {
2528 Assert(enmEffOpSize == IEMMODE_32BIT);
2529 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
2530 }
2531 }
2532
2533 /* Commit the result and clear any current guest shadows for RIP. */
2534 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
2535 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2536 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2537#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2538 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2539 pReNative->Core.fDebugPcInitialized = true;
2540 Log4(("uPcUpdatingDebug=rip/ret off=%#x\n", off));
2541#endif
2542
2543 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2544 if (!fFlat)
2545 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2546
2547 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2548 if (idxRegEffSp != idxRegRsp)
2549 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2550 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2551 return off;
2552}
2553
2554
2555/*********************************************************************************************************************************
2556* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2557*********************************************************************************************************************************/
2558
2559#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2560 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2561
2562/**
2563 * Emits code to check if a \#NM exception should be raised.
2564 *
2565 * @returns New code buffer offset, UINT32_MAX on failure.
2566 * @param pReNative The native recompile state.
2567 * @param off The code buffer offset.
2568 * @param idxInstr The current instruction.
2569 */
2570DECL_INLINE_THROW(uint32_t)
2571iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2572{
2573#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2574 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2575
2576 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2577 {
2578#endif
2579 /*
2580 * Make sure we don't have any outstanding guest register writes as we may
2581 * raise an #NM and all guest register must be up to date in CPUMCTX.
2582 */
2583 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2584 off = iemNativeRegFlushPendingWrites(pReNative, off);
2585
2586#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2587 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2588#else
2589 RT_NOREF(idxInstr);
2590#endif
2591
2592 /* Allocate a temporary CR0 register. */
2593 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2594 kIemNativeGstRegUse_ReadOnly);
2595
2596 /*
2597 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2598 * return raisexcpt();
2599 */
2600 /* Test and jump. */
2601 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS,
2602 kIemNativeLabelType_RaiseNm);
2603
2604 /* Free but don't flush the CR0 register. */
2605 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2606
2607#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2608 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2609 }
2610 else
2611 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2612#endif
2613
2614 return off;
2615}
2616
2617
2618#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2619 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2620
2621/**
2622 * Emits code to check if a \#NM exception should be raised.
2623 *
2624 * @returns New code buffer offset, UINT32_MAX on failure.
2625 * @param pReNative The native recompile state.
2626 * @param off The code buffer offset.
2627 * @param idxInstr The current instruction.
2628 */
2629DECL_INLINE_THROW(uint32_t)
2630iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2631{
2632#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2633 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2634
2635 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2636 {
2637#endif
2638 /*
2639 * Make sure we don't have any outstanding guest register writes as we may
2640 * raise an #NM and all guest register must be up to date in CPUMCTX.
2641 */
2642 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2643 off = iemNativeRegFlushPendingWrites(pReNative, off);
2644
2645#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2646 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2647#else
2648 RT_NOREF(idxInstr);
2649#endif
2650
2651 /* Allocate a temporary CR0 register. */
2652 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2653 kIemNativeGstRegUse_Calculation);
2654
2655 /*
2656 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2657 * return raisexcpt();
2658 */
2659 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2660 /* Test and jump. */
2661 off = iemNativeEmitTestIfGpr32EqualsImmAndTbExit(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS,
2662 kIemNativeLabelType_RaiseNm);
2663
2664 /* Free the CR0 register. */
2665 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2666
2667#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2668 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2669 }
2670 else
2671 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2672#endif
2673
2674 return off;
2675}
2676
2677
2678#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2679 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2680
2681/**
2682 * Emits code to check if a \#MF exception should be raised.
2683 *
2684 * @returns New code buffer offset, UINT32_MAX on failure.
2685 * @param pReNative The native recompile state.
2686 * @param off The code buffer offset.
2687 * @param idxInstr The current instruction.
2688 */
2689DECL_INLINE_THROW(uint32_t)
2690iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2691{
2692 /*
2693 * Make sure we don't have any outstanding guest register writes as we may
2694 * raise an #MF and all guest register must be up to date in CPUMCTX.
2695 */
2696 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2697 off = iemNativeRegFlushPendingWrites(pReNative, off);
2698
2699#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2700 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2701#else
2702 RT_NOREF(idxInstr);
2703#endif
2704
2705 /* Allocate a temporary FSW register. */
2706 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2707 kIemNativeGstRegUse_ReadOnly);
2708
2709 /*
2710 * if (FSW & X86_FSW_ES != 0)
2711 * return raisexcpt();
2712 */
2713 /* Test and jump. */
2714 off = iemNativeEmitTestBitInGprAndTbExitIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, kIemNativeLabelType_RaiseMf);
2715
2716 /* Free but don't flush the FSW register. */
2717 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2718
2719 return off;
2720}
2721
2722
2723#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2724 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2725
2726/**
2727 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2728 *
2729 * @returns New code buffer offset, UINT32_MAX on failure.
2730 * @param pReNative The native recompile state.
2731 * @param off The code buffer offset.
2732 * @param idxInstr The current instruction.
2733 */
2734DECL_INLINE_THROW(uint32_t)
2735iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2736{
2737#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2738 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2739
2740 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2741 {
2742#endif
2743 /*
2744 * Make sure we don't have any outstanding guest register writes as we may
2745 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2746 */
2747 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2748 off = iemNativeRegFlushPendingWrites(pReNative, off);
2749
2750#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2751 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2752#else
2753 RT_NOREF(idxInstr);
2754#endif
2755
2756 /* Allocate a temporary CR0 and CR4 register. */
2757 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2758 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2759 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2760
2761 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2762#ifdef RT_ARCH_AMD64
2763 /*
2764 * We do a modified test here:
2765 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2766 * else { goto RaiseSseRelated; }
2767 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2768 * all targets except the 386, which doesn't support SSE, this should
2769 * be a safe assumption.
2770 */
2771 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2772 //pCodeBuf[off++] = 0xcc;
2773 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2774 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2775 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2776 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2777 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2778 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseSseRelated, kIemNativeInstrCond_ne);
2779
2780#elif defined(RT_ARCH_ARM64)
2781 /*
2782 * We do a modified test here:
2783 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2784 * else { goto RaiseSseRelated; }
2785 */
2786 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2787 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2788 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2789 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2790 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2791 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2792 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2793 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2794 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2795 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2796 kIemNativeLabelType_RaiseSseRelated);
2797
2798#else
2799# error "Port me!"
2800#endif
2801
2802 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2803 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2804 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2805 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2806
2807#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2808 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2809 }
2810 else
2811 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2812#endif
2813
2814 return off;
2815}
2816
2817
2818#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2819 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2820
2821/**
2822 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2823 *
2824 * @returns New code buffer offset, UINT32_MAX on failure.
2825 * @param pReNative The native recompile state.
2826 * @param off The code buffer offset.
2827 * @param idxInstr The current instruction.
2828 */
2829DECL_INLINE_THROW(uint32_t)
2830iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2831{
2832#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2833 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2834
2835 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2836 {
2837#endif
2838 /*
2839 * Make sure we don't have any outstanding guest register writes as we may
2840 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2841 */
2842 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2843 off = iemNativeRegFlushPendingWrites(pReNative, off);
2844
2845#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2846 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2847#else
2848 RT_NOREF(idxInstr);
2849#endif
2850
2851 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2852 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2853 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2854 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2855 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2856
2857 /*
2858 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2859 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2860 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2861 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2862 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2863 * { likely }
2864 * else { goto RaiseAvxRelated; }
2865 */
2866#ifdef RT_ARCH_AMD64
2867 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2868 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2869 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2870 ^ 0x1a) ) { likely }
2871 else { goto RaiseAvxRelated; } */
2872 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2873 //pCodeBuf[off++] = 0xcc;
2874 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2875 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2876 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2877 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2878 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2879 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2880 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2881 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2882 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2883 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2884 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseAvxRelated, kIemNativeInstrCond_ne);
2885
2886#elif defined(RT_ARCH_ARM64)
2887 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2888 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2889 else { goto RaiseAvxRelated; } */
2890 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2891 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2892 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2893 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2894 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2895 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2896 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2897 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2898 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2899 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2900 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2901 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2902 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2903 kIemNativeLabelType_RaiseAvxRelated);
2904
2905#else
2906# error "Port me!"
2907#endif
2908
2909 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2910 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2911 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2912 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2913#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2914 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2915 }
2916 else
2917 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2918#endif
2919
2920 return off;
2921}
2922
2923
2924#define IEM_MC_RAISE_DIVIDE_ERROR() \
2925 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2926
2927/**
2928 * Emits code to raise a \#DE.
2929 *
2930 * @returns New code buffer offset, UINT32_MAX on failure.
2931 * @param pReNative The native recompile state.
2932 * @param off The code buffer offset.
2933 * @param idxInstr The current instruction.
2934 */
2935DECL_INLINE_THROW(uint32_t)
2936iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2937{
2938 /*
2939 * Make sure we don't have any outstanding guest register writes as we may
2940 */
2941 off = iemNativeRegFlushPendingWrites(pReNative, off);
2942
2943#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2944 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2945#else
2946 RT_NOREF(idxInstr);
2947#endif
2948
2949 /* raise \#DE exception unconditionally. */
2950 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseDe);
2951}
2952
2953
2954#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2955 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2956
2957/**
2958 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2959 *
2960 * @returns New code buffer offset, UINT32_MAX on failure.
2961 * @param pReNative The native recompile state.
2962 * @param off The code buffer offset.
2963 * @param idxInstr The current instruction.
2964 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2965 * @param cbAlign The alignment in bytes to check against.
2966 */
2967DECL_INLINE_THROW(uint32_t)
2968iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2969 uint8_t idxVarEffAddr, uint8_t cbAlign)
2970{
2971 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2972 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2973
2974 /*
2975 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2976 */
2977 off = iemNativeRegFlushPendingWrites(pReNative, off);
2978
2979#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2980 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2981#else
2982 RT_NOREF(idxInstr);
2983#endif
2984
2985 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2986
2987 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxVarReg, cbAlign - 1,
2988 kIemNativeLabelType_RaiseGp0);
2989
2990 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2991 return off;
2992}
2993
2994
2995/*********************************************************************************************************************************
2996* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2997*********************************************************************************************************************************/
2998
2999/**
3000 * Pushes an IEM_MC_IF_XXX onto the condition stack.
3001 *
3002 * @returns Pointer to the condition stack entry on success, NULL on failure
3003 * (too many nestings)
3004 */
3005DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
3006{
3007 uint32_t const idxStack = pReNative->cCondDepth;
3008 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
3009
3010 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
3011 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
3012
3013 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
3014 pEntry->fInElse = false;
3015 pEntry->fIfExitTb = false;
3016 pEntry->fElseExitTb = false;
3017 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
3018 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
3019
3020 return pEntry;
3021}
3022
3023
3024/**
3025 * Start of the if-block, snapshotting the register and variable state.
3026 */
3027DECL_INLINE_THROW(void)
3028iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
3029{
3030 Assert(offIfBlock != UINT32_MAX);
3031 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3032 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3033 Assert(!pEntry->fInElse);
3034
3035 /* Define the start of the IF block if request or for disassembly purposes. */
3036 if (idxLabelIf != UINT32_MAX)
3037 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
3038#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3039 else
3040 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
3041#else
3042 RT_NOREF(offIfBlock);
3043#endif
3044
3045 /* Copy the initial state so we can restore it in the 'else' block. */
3046 pEntry->InitialState = pReNative->Core;
3047}
3048
3049
3050#define IEM_MC_ELSE() } while (0); \
3051 off = iemNativeEmitElse(pReNative, off); \
3052 do {
3053
3054/** Emits code related to IEM_MC_ELSE. */
3055DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3056{
3057 /* Check sanity and get the conditional stack entry. */
3058 Assert(off != UINT32_MAX);
3059 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3060 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3061 Assert(!pEntry->fInElse);
3062
3063 /* We can skip dirty register flushing and the dirty register flushing if
3064 the branch already jumped to a TB exit. */
3065 if (!pEntry->fIfExitTb)
3066 {
3067#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) && 0
3068 /* Writeback any dirty shadow registers. */
3069 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
3070 * in one of the branches and leave guest registers already dirty before the start of the if
3071 * block alone. */
3072 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3073#endif
3074
3075 /* Jump to the endif. */
3076 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
3077 }
3078# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3079 else
3080 Assert(pReNative->Core.offPc == 0);
3081# endif
3082
3083 /* Define the else label and enter the else part of the condition. */
3084 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3085 pEntry->fInElse = true;
3086
3087 /* Snapshot the core state so we can do a merge at the endif and restore
3088 the snapshot we took at the start of the if-block. */
3089 pEntry->IfFinalState = pReNative->Core;
3090 pReNative->Core = pEntry->InitialState;
3091
3092 return off;
3093}
3094
3095
3096#define IEM_MC_ENDIF() } while (0); \
3097 off = iemNativeEmitEndIf(pReNative, off)
3098
3099/** Emits code related to IEM_MC_ENDIF. */
3100DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3101{
3102 /* Check sanity and get the conditional stack entry. */
3103 Assert(off != UINT32_MAX);
3104 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3105 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3106
3107#if defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) && 0
3108 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3109#endif
3110
3111 /*
3112 * If either of the branches exited the TB, we can take the state from the
3113 * other branch and skip all the merging headache.
3114 */
3115 bool fDefinedLabels = false;
3116 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
3117 {
3118#ifdef VBOX_STRICT
3119 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
3120 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
3121 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
3122 ? &pEntry->IfFinalState : &pReNative->Core;
3123# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3124 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
3125# endif
3126# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3127 Assert(pExitCoreState->offPc == 0);
3128# endif
3129 RT_NOREF(pExitCoreState);
3130#endif
3131
3132 if (!pEntry->fIfExitTb)
3133 {
3134 Assert(pEntry->fInElse);
3135 pReNative->Core = pEntry->IfFinalState;
3136 }
3137 }
3138 else
3139 {
3140 /*
3141 * Now we have find common group with the core state at the end of the
3142 * if-final. Use the smallest common denominator and just drop anything
3143 * that isn't the same in both states.
3144 */
3145 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
3146 * which is why we're doing this at the end of the else-block.
3147 * But we'd need more info about future for that to be worth the effort. */
3148 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
3149#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3150 AssertMsgStmt(pReNative->Core.offPc == pOther->offPc,
3151 ("Core.offPc=%#RX64 pOther->offPc=%#RX64\n", pReNative->Core.offPc, pOther->offPc),
3152 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3153#endif
3154
3155 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
3156 {
3157#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3158 /*
3159 * If the branch has differences in dirty shadow registers, we will flush
3160 * the register only dirty in the current branch and dirty any that's only
3161 * dirty in the other one.
3162 */
3163 uint64_t const fGstRegDirtyOther = pOther->bmGstRegShadowDirty;
3164 uint64_t const fGstRegDirtyThis = pReNative->Core.bmGstRegShadowDirty;
3165 uint64_t const fGstRegDirtyDiff = fGstRegDirtyOther ^ fGstRegDirtyThis;
3166 uint64_t const fGstRegDirtyHead = fGstRegDirtyThis & fGstRegDirtyDiff;
3167 uint64_t fGstRegDirtyTail = fGstRegDirtyOther & fGstRegDirtyDiff;
3168 if (!fGstRegDirtyDiff)
3169 { /* likely */ }
3170 else
3171 {
3172 //uint64_t const fGstRegDirtyHead = pReNative->Core.bmGstRegShadowDirty & fGstRegDirtyDiff;
3173 if (fGstRegDirtyHead)
3174 {
3175 Log12(("iemNativeEmitEndIf: flushing dirty guest registers in current branch: %RX64\n", fGstRegDirtyHead));
3176 off = iemNativeRegFlushDirtyGuest(pReNative, off, fGstRegDirtyHead);
3177 }
3178 }
3179#endif
3180
3181 /*
3182 * Shadowed guest registers.
3183 *
3184 * We drop any shadows where the two states disagree about where
3185 * things are kept. We may end up flushing dirty more registers
3186 * here, if the two branches keeps things in different registers.
3187 */
3188 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
3189 if (fGstRegs)
3190 {
3191 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
3192 do
3193 {
3194 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3195 fGstRegs &= ~RT_BIT_64(idxGstReg);
3196
3197 uint8_t const idxCurHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3198 uint8_t const idxOtherHstReg = pOther->aidxGstRegShadows[idxGstReg];
3199 if ( idxCurHstReg != idxOtherHstReg
3200 || !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg)))
3201 {
3202#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3203 Log12(("iemNativeEmitEndIf: dropping gst %s (%d) from hst %s (other %d/%#RX64)\n",
3204 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3205 idxOtherHstReg, pOther->bmGstRegShadows));
3206#else
3207 Log12(("iemNativeEmitEndIf: dropping %s gst %s (%d) from hst %s (other %d/%#RX64/%s)\n",
3208 pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "_dirty_" : "clean",
3209 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3210 idxOtherHstReg, pOther->bmGstRegShadows,
3211 pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "dirty" : "clean"));
3212 if (pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3213 fGstRegDirtyTail |= RT_BIT_64(idxGstReg);
3214 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3215 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3216#endif
3217 iemNativeRegClearGstRegShadowingOne(pReNative, idxCurHstReg, (IEMNATIVEGSTREG)idxGstReg, off);
3218 }
3219 } while (fGstRegs);
3220 }
3221 else
3222 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
3223
3224#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3225 /*
3226 * Generate jumpy code for flushing dirty registers from the other
3227 * branch that aren't dirty in the current one.
3228 */
3229 if (!fGstRegDirtyTail)
3230 { /* likely */ }
3231 else
3232 {
3233 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEndIfOtherBranchDirty);
3234 Log12(("iemNativeEmitEndIf: Dirty register only in the other branch: %#RX64 - BAD!\n", fGstRegDirtyTail));
3235
3236 /* First the current branch has to jump over the dirty flushing from the other branch. */
3237 uint32_t const offFixup1 = off;
3238 off = iemNativeEmitJmpToFixed(pReNative, off, off + 10);
3239
3240 /* Put the endif and maybe else label here so the other branch ends up here. */
3241 if (!pEntry->fInElse)
3242 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3243 else
3244 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3245 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3246 fDefinedLabels = true;
3247
3248 /* Flush the dirty guest registers from the other branch. */
3249 while (fGstRegDirtyTail)
3250 {
3251 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegDirtyTail) - 1;
3252 fGstRegDirtyTail &= ~RT_BIT_64(idxGstReg);
3253 Log12(("iemNativeEmitEndIf: tail flushing %s (%d) from other branch %d (cur %d/%#RX64)\n",
3254 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, pOther->aidxGstRegShadows[idxGstReg],
3255 pReNative->Core.aidxGstRegShadows[idxGstReg], pReNative->Core.bmGstRegShadows));
3256
3257 off = iemNativeRegFlushPendingWriteEx(pReNative, off, (PIEMNATIVECORESTATE)pOther, (IEMNATIVEGSTREG)idxGstReg);
3258
3259 /* Mismatching shadowing should've been dropped in the previous step already. */
3260 Assert( !(pReNative->Core.bmGstRegShadows & RT_BIT_64(idxGstReg))
3261 || pReNative->Core.aidxGstRegShadows[idxGstReg] == pOther->aidxGstRegShadows[idxGstReg]);
3262 }
3263
3264 /* Here is the actual endif label, fixup the above jump to land here. */
3265 iemNativeFixupFixedJump(pReNative, offFixup1, off);
3266 }
3267#endif
3268
3269 /*
3270 * Check variables next. For now we must require them to be identical
3271 * or stuff we can recreate. (No code is emitted here.)
3272 */
3273 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
3274#ifdef VBOX_STRICT
3275 uint32_t const offAssert = off;
3276#endif
3277 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
3278 if (fVars)
3279 {
3280 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
3281 do
3282 {
3283 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
3284 fVars &= ~RT_BIT_32(idxVar);
3285
3286 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
3287 {
3288 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
3289 continue;
3290 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3291 {
3292 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3293 if (idxHstReg != UINT8_MAX)
3294 {
3295 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3296 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3297 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
3298 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3299 }
3300 continue;
3301 }
3302 }
3303 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
3304 continue;
3305
3306 /* Irreconcilable, so drop it. */
3307 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3308 if (idxHstReg != UINT8_MAX)
3309 {
3310 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3311 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3312 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
3313 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3314 }
3315 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3316 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
3317 } while (fVars);
3318 }
3319 Assert(off == offAssert);
3320
3321 /*
3322 * Finally, check that the host register allocations matches.
3323 */
3324 AssertMsgStmt((pReNative->Core.bmHstRegs & (pReNative->Core.bmHstRegs ^ pOther->bmHstRegs)) == 0,
3325 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
3326 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
3327 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3328 }
3329 }
3330
3331 /*
3332 * Define the endif label and maybe the else one if we're still in the 'if' part.
3333 */
3334 if (!fDefinedLabels)
3335 {
3336 if (!pEntry->fInElse)
3337 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3338 else
3339 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3340 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3341 }
3342
3343 /* Pop the conditional stack.*/
3344 pReNative->cCondDepth -= 1;
3345
3346 return off;
3347}
3348
3349
3350/**
3351 * Helper function to convert X86_EFL_xxx masks to liveness masks.
3352 *
3353 * The compiler should be able to figure this out at compile time, so sprinkling
3354 * constexpr where ever possible here to nudge it along.
3355 */
3356template<uint32_t const a_fEfl>
3357RT_CONSTEXPR uint64_t iemNativeEflagsToLivenessMask(void)
3358{
3359 return (a_fEfl & ~X86_EFL_STATUS_BITS ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER) : 0)
3360 | (a_fEfl & X86_EFL_CF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_CF) : 0)
3361 | (a_fEfl & X86_EFL_PF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_PF) : 0)
3362 | (a_fEfl & X86_EFL_AF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_AF) : 0)
3363 | (a_fEfl & X86_EFL_ZF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_ZF) : 0)
3364 | (a_fEfl & X86_EFL_SF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_SF) : 0)
3365 | (a_fEfl & X86_EFL_OF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OF) : 0);
3366}
3367
3368
3369/**
3370 * Helper function to convert a single X86_EFL_xxxx value to bit number.
3371 *
3372 * The compiler should be able to figure this out at compile time, so sprinkling
3373 * constexpr where ever possible here to nudge it along.
3374 */
3375template<uint32_t const a_fEfl>
3376RT_CONSTEXPR unsigned iemNativeEflagsToSingleBitNo(void)
3377{
3378 AssertCompile( a_fEfl == X86_EFL_CF
3379 || a_fEfl == X86_EFL_PF
3380 || a_fEfl == X86_EFL_AF
3381 || a_fEfl == X86_EFL_ZF
3382 || a_fEfl == X86_EFL_SF
3383 || a_fEfl == X86_EFL_OF
3384 || a_fEfl == X86_EFL_DF);
3385 return a_fEfl == X86_EFL_CF ? X86_EFL_CF_BIT
3386 : a_fEfl == X86_EFL_PF ? X86_EFL_PF_BIT
3387 : a_fEfl == X86_EFL_AF ? X86_EFL_AF_BIT
3388 : a_fEfl == X86_EFL_ZF ? X86_EFL_ZF_BIT
3389 : a_fEfl == X86_EFL_SF ? X86_EFL_SF_BIT
3390 : a_fEfl == X86_EFL_OF ? X86_EFL_OF_BIT
3391 : X86_EFL_DF_BIT;
3392}
3393
3394
3395#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
3396 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3397 do {
3398
3399/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
3400DECL_INLINE_THROW(uint32_t)
3401iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3402{
3403 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3404 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3405 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3406
3407 /* Get the eflags. */
3408 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ReadOnly, fLivenessEflBits);
3409
3410 /* Test and jump. */
3411 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3412
3413 /* Free but don't flush the EFlags register. */
3414 iemNativeRegFreeTmp(pReNative, idxEflReg);
3415
3416 /* Make a copy of the core state now as we start the if-block. */
3417 iemNativeCondStartIfBlock(pReNative, off);
3418
3419 return off;
3420}
3421
3422
3423#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
3424 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3425 do {
3426
3427/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
3428DECL_INLINE_THROW(uint32_t)
3429iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3430{
3431 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3432 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3433 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3434
3435 /* Get the eflags. */
3436 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ReadOnly, fLivenessEflBits);
3437
3438 /* Test and jump. */
3439 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3440
3441 /* Free but don't flush the EFlags register. */
3442 iemNativeRegFreeTmp(pReNative, idxEflReg);
3443
3444 /* Make a copy of the core state now as we start the if-block. */
3445 iemNativeCondStartIfBlock(pReNative, off);
3446
3447 return off;
3448}
3449
3450
3451#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
3452 off = iemNativeEmitIfEflagsBitSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3453 iemNativeEflagsToLivenessMask<a_fBit>()); \
3454 do {
3455
3456/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
3457DECL_INLINE_THROW(uint32_t)
3458iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3459{
3460 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3461 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3462 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3463
3464 /* Get the eflags. */
3465 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ReadOnly, fLivenessEflBit);
3466
3467 /* Test and jump. */
3468 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3469
3470 /* Free but don't flush the EFlags register. */
3471 iemNativeRegFreeTmp(pReNative, idxEflReg);
3472
3473 /* Make a copy of the core state now as we start the if-block. */
3474 iemNativeCondStartIfBlock(pReNative, off);
3475
3476 return off;
3477}
3478
3479
3480#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
3481 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3482 iemNativeEflagsToLivenessMask<a_fBit>()); \
3483 do {
3484
3485/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
3486DECL_INLINE_THROW(uint32_t)
3487iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3488{
3489 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3490 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3491 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3492
3493 /* Get the eflags. */
3494 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ReadOnly, fLivenessEflBit);
3495
3496 /* Test and jump. */
3497 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3498
3499 /* Free but don't flush the EFlags register. */
3500 iemNativeRegFreeTmp(pReNative, idxEflReg);
3501
3502 /* Make a copy of the core state now as we start the if-block. */
3503 iemNativeCondStartIfBlock(pReNative, off);
3504
3505 return off;
3506}
3507
3508
3509#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
3510 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3511 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3512 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3513 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3514 do {
3515
3516#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
3517 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3518 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3519 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3520 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3521 do {
3522
3523/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
3524DECL_INLINE_THROW(uint32_t)
3525iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3526 bool fInverted, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3527{
3528 Assert(iBitNo1 != iBitNo2);
3529 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3530 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3531 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3532
3533 /* Get the eflags. */
3534 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ReadOnly, fLivenessEflBits);
3535
3536#ifdef RT_ARCH_AMD64
3537 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1));
3538
3539 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3540 if (iBitNo1 > iBitNo2)
3541 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3542 else
3543 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3544 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3545
3546#elif defined(RT_ARCH_ARM64)
3547 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3548 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3549
3550 /* and tmpreg, eflreg, #1<<iBitNo1 */
3551 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3552
3553 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3554 if (iBitNo1 > iBitNo2)
3555 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3556 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3557 else
3558 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3559 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3560
3561 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3562
3563#else
3564# error "Port me"
3565#endif
3566
3567 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3568 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3569 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3570
3571 /* Free but don't flush the EFlags and tmp registers. */
3572 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3573 iemNativeRegFreeTmp(pReNative, idxEflReg);
3574
3575 /* Make a copy of the core state now as we start the if-block. */
3576 iemNativeCondStartIfBlock(pReNative, off);
3577
3578 return off;
3579}
3580
3581
3582#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
3583 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3584 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3585 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3586 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3587 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3588 do {
3589
3590#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
3591 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3592 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3593 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3594 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3595 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3596 do {
3597
3598/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
3599 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
3600DECL_INLINE_THROW(uint32_t)
3601iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fInverted,
3602 unsigned iBitNo, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3603{
3604 Assert(iBitNo1 != iBitNo);
3605 Assert(iBitNo2 != iBitNo);
3606 Assert(iBitNo2 != iBitNo1);
3607 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3608 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3609 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3610
3611 /* We need an if-block label for the non-inverted variant. */
3612 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
3613 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
3614
3615 /* Get the eflags. */
3616 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ReadOnly, fLivenessEflBits);
3617
3618#ifdef RT_ARCH_AMD64
3619 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1)); /* This must come before we jump anywhere! */
3620#elif defined(RT_ARCH_ARM64)
3621 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3622#endif
3623
3624 /* Check for the lone bit first. */
3625 if (!fInverted)
3626 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3627 else
3628 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
3629
3630 /* Then extract and compare the other two bits. */
3631#ifdef RT_ARCH_AMD64
3632 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3633 if (iBitNo1 > iBitNo2)
3634 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3635 else
3636 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3637 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3638
3639#elif defined(RT_ARCH_ARM64)
3640 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3641
3642 /* and tmpreg, eflreg, #1<<iBitNo1 */
3643 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3644
3645 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3646 if (iBitNo1 > iBitNo2)
3647 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3648 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3649 else
3650 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3651 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3652
3653 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3654
3655#else
3656# error "Port me"
3657#endif
3658
3659 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3660 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3661 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3662
3663 /* Free but don't flush the EFlags and tmp registers. */
3664 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3665 iemNativeRegFreeTmp(pReNative, idxEflReg);
3666
3667 /* Make a copy of the core state now as we start the if-block. */
3668 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3669
3670 return off;
3671}
3672
3673
3674#define IEM_MC_IF_CX_IS_NZ() \
3675 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3676 do {
3677
3678/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3679DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3680{
3681 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3682
3683 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3684 kIemNativeGstRegUse_ReadOnly);
3685 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3686 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3687
3688 iemNativeCondStartIfBlock(pReNative, off);
3689 return off;
3690}
3691
3692
3693#define IEM_MC_IF_ECX_IS_NZ() \
3694 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3695 do {
3696
3697#define IEM_MC_IF_RCX_IS_NZ() \
3698 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3699 do {
3700
3701/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3702DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3703{
3704 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3705
3706 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3707 kIemNativeGstRegUse_ReadOnly);
3708 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3709 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3710
3711 iemNativeCondStartIfBlock(pReNative, off);
3712 return off;
3713}
3714
3715
3716#define IEM_MC_IF_CX_IS_NOT_ONE() \
3717 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3718 do {
3719
3720/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3721DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3722{
3723 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3724
3725 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3726 kIemNativeGstRegUse_ReadOnly);
3727#ifdef RT_ARCH_AMD64
3728 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3729#else
3730 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3731 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3732 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3733#endif
3734 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3735
3736 iemNativeCondStartIfBlock(pReNative, off);
3737 return off;
3738}
3739
3740
3741#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3742 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3743 do {
3744
3745#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3746 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3747 do {
3748
3749/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3750DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3751{
3752 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3753
3754 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3755 kIemNativeGstRegUse_ReadOnly);
3756 if (f64Bit)
3757 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3758 else
3759 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3760 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3761
3762 iemNativeCondStartIfBlock(pReNative, off);
3763 return off;
3764}
3765
3766
3767#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3768 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, \
3769 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3770 iemNativeEflagsToLivenessMask<a_fBit>()); \
3771 do {
3772
3773#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3774 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, \
3775 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3776 iemNativeEflagsToLivenessMask<a_fBit>()); \
3777 do {
3778
3779/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3780 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3781DECL_INLINE_THROW(uint32_t)
3782iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3783 bool fCheckIfSet, unsigned iBitNo, uint64_t fLivenessEflBit)
3784{
3785 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3786 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3787 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3788
3789 /* We have to load both RCX and EFLAGS before we can start branching,
3790 otherwise we'll end up in the else-block with an inconsistent
3791 register allocator state.
3792 Doing EFLAGS first as it's more likely to be loaded, right? */
3793 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ReadOnly, fLivenessEflBit);
3794 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3795 kIemNativeGstRegUse_ReadOnly);
3796
3797 /** @todo we could reduce this to a single branch instruction by spending a
3798 * temporary register and some setnz stuff. Not sure if loops are
3799 * worth it. */
3800 /* Check CX. */
3801#ifdef RT_ARCH_AMD64
3802 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3803#else
3804 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3805 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3806 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3807#endif
3808
3809 /* Check the EFlags bit. */
3810 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3811 !fCheckIfSet /*fJmpIfSet*/);
3812
3813 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3814 iemNativeRegFreeTmp(pReNative, idxEflReg);
3815
3816 iemNativeCondStartIfBlock(pReNative, off);
3817 return off;
3818}
3819
3820
3821#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3822 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, false /*f64Bit*/, \
3823 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3824 iemNativeEflagsToLivenessMask<a_fBit>()); \
3825 do {
3826
3827#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3828 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, false /*f64Bit*/, \
3829 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3830 iemNativeEflagsToLivenessMask<a_fBit>()); \
3831 do {
3832
3833#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3834 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, true /*f64Bit*/, \
3835 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3836 iemNativeEflagsToLivenessMask<a_fBit>()); \
3837 do {
3838
3839#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3840 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, true /*f64Bit*/, \
3841 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3842 iemNativeEflagsToLivenessMask<a_fBit>()); \
3843 do {
3844
3845/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3846 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3847 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3848 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3849DECL_INLINE_THROW(uint32_t)
3850iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fCheckIfSet, bool f64Bit,
3851 unsigned iBitNo, uint64_t fLivenessEFlBit)
3852
3853{
3854 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3855 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3856 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3857
3858 /* We have to load both RCX and EFLAGS before we can start branching,
3859 otherwise we'll end up in the else-block with an inconsistent
3860 register allocator state.
3861 Doing EFLAGS first as it's more likely to be loaded, right? */
3862 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ReadOnly, fLivenessEFlBit);
3863 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3864 kIemNativeGstRegUse_ReadOnly);
3865
3866 /** @todo we could reduce this to a single branch instruction by spending a
3867 * temporary register and some setnz stuff. Not sure if loops are
3868 * worth it. */
3869 /* Check RCX/ECX. */
3870 if (f64Bit)
3871 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3872 else
3873 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3874
3875 /* Check the EFlags bit. */
3876 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3877 !fCheckIfSet /*fJmpIfSet*/);
3878
3879 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3880 iemNativeRegFreeTmp(pReNative, idxEflReg);
3881
3882 iemNativeCondStartIfBlock(pReNative, off);
3883 return off;
3884}
3885
3886
3887#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3888 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3889 do {
3890
3891/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3892DECL_INLINE_THROW(uint32_t)
3893iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3894{
3895 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3896
3897 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3898 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3899 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3900 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3901
3902 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3903
3904 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3905
3906 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3907
3908 iemNativeCondStartIfBlock(pReNative, off);
3909 return off;
3910}
3911
3912
3913#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3914 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3915 do {
3916
3917/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3918DECL_INLINE_THROW(uint32_t)
3919iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3920{
3921 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3922 Assert(iGReg < 16);
3923
3924 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3925 kIemNativeGstRegUse_ReadOnly);
3926
3927 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3928
3929 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3930
3931 iemNativeCondStartIfBlock(pReNative, off);
3932 return off;
3933}
3934
3935
3936
3937/*********************************************************************************************************************************
3938* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3939*********************************************************************************************************************************/
3940
3941#define IEM_MC_NOREF(a_Name) \
3942 RT_NOREF_PV(a_Name)
3943
3944#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3945 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3946
3947#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3948 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3949
3950#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3951 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3952
3953#define IEM_MC_LOCAL(a_Type, a_Name) \
3954 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3955
3956#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3957 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3958
3959#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3960 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3961
3962
3963/**
3964 * Sets the host register for @a idxVarRc to @a idxReg.
3965 *
3966 * Any guest register shadowing will be implictly dropped by this call.
3967 *
3968 * The variable must not have any register associated with it (causes
3969 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3970 * implied.
3971 *
3972 * @returns idxReg
3973 * @param pReNative The recompiler state.
3974 * @param idxVar The variable.
3975 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3976 * @param off For recording in debug info.
3977 * @param fAllocated Set if the register is already allocated, false if not.
3978 *
3979 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3980 */
3981DECL_INLINE_THROW(uint8_t)
3982iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off, bool fAllocated)
3983{
3984 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3985 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3986 Assert(!pVar->fRegAcquired);
3987 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3988 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3989 AssertStmt(RT_BOOL(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)) == fAllocated,
3990 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3991
3992 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3993 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3994
3995 iemNativeVarSetKindToStack(pReNative, idxVar);
3996 pVar->idxReg = idxReg;
3997
3998 return idxReg;
3999}
4000
4001
4002/**
4003 * A convenient helper function.
4004 */
4005DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
4006 uint8_t idxReg, uint32_t *poff)
4007{
4008 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff, false /*fAllocated*/);
4009 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
4010 return idxReg;
4011}
4012
4013
4014/**
4015 * This is called by IEM_MC_END() to clean up all variables.
4016 */
4017DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
4018{
4019 uint32_t const bmVars = pReNative->Core.bmVars;
4020 if (bmVars != 0)
4021 iemNativeVarFreeAllSlow(pReNative, bmVars);
4022 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
4023 Assert(pReNative->Core.bmStack == 0);
4024}
4025
4026
4027#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
4028
4029/**
4030 * This is called by IEM_MC_FREE_LOCAL.
4031 */
4032DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4033{
4034 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4035 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
4036 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4037}
4038
4039
4040#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
4041
4042/**
4043 * This is called by IEM_MC_FREE_ARG.
4044 */
4045DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4046{
4047 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4048 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
4049 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4050}
4051
4052
4053#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
4054
4055/**
4056 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
4057 */
4058DECL_INLINE_THROW(uint32_t)
4059iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
4060{
4061 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
4062 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
4063 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4064 Assert( pVarDst->cbVar == sizeof(uint16_t)
4065 || pVarDst->cbVar == sizeof(uint32_t));
4066
4067 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
4068 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
4069 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
4070 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
4071 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4072
4073 Assert(pVarDst->cbVar < pVarSrc->cbVar);
4074
4075 /*
4076 * Special case for immediates.
4077 */
4078 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
4079 {
4080 switch (pVarDst->cbVar)
4081 {
4082 case sizeof(uint16_t):
4083 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
4084 break;
4085 case sizeof(uint32_t):
4086 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
4087 break;
4088 default: AssertFailed(); break;
4089 }
4090 }
4091 else
4092 {
4093 /*
4094 * The generic solution for now.
4095 */
4096 /** @todo optimize this by having the python script make sure the source
4097 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
4098 * statement. Then we could just transfer the register assignments. */
4099 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
4100 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
4101 switch (pVarDst->cbVar)
4102 {
4103 case sizeof(uint16_t):
4104 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
4105 break;
4106 case sizeof(uint32_t):
4107 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
4108 break;
4109 default: AssertFailed(); break;
4110 }
4111 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
4112 iemNativeVarRegisterRelease(pReNative, idxVarDst);
4113 }
4114 return off;
4115}
4116
4117
4118
4119/*********************************************************************************************************************************
4120* Emitters for IEM_MC_CALL_CIMPL_XXX *
4121*********************************************************************************************************************************/
4122
4123/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
4124DECL_INLINE_THROW(uint32_t)
4125iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
4126 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
4127
4128{
4129 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
4130 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
4131
4132#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4133 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
4134 when a calls clobber any of the relevant control registers. */
4135# if 1
4136 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
4137 {
4138 /* Likely as long as call+ret are done via cimpl. */
4139 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
4140 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
4141 }
4142 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
4143 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4144 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
4145 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4146 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
4147 else
4148 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4149 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4150 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4151
4152# else
4153 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
4154 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4155 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
4156 pReNative->fSimdRaiseXcptChecksEmitted = 0;
4157 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
4158 || pfnCImpl == (uintptr_t)iemCImpl_callf
4159 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
4160 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
4161 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4162 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4163 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4164# endif
4165
4166# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
4167 /* Mark the host floating point control register as not synced if MXCSR is modified. */
4168 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
4169 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
4170# endif
4171#endif
4172
4173 /*
4174 * Do all the call setup and cleanup.
4175 */
4176 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
4177
4178 /*
4179 * Load the two or three hidden arguments.
4180 */
4181#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4182 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4183 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4184 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
4185#else
4186 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4187 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
4188#endif
4189
4190 /*
4191 * Make the call and check the return code.
4192 *
4193 * Shadow PC copies are always flushed here, other stuff depends on flags.
4194 * Segment and general purpose registers are explictily flushed via the
4195 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
4196 * macros.
4197 */
4198 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
4199#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4200 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4201#endif
4202 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
4203 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
4204 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4205 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4206
4207#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
4208 pReNative->Core.fDebugPcInitialized = false;
4209 Log4(("fDebugPcInitialized=false cimpl off=%#x (v1)\n", off));
4210#endif
4211
4212 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4213}
4214
4215
4216#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4217 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
4218
4219/** Emits code for IEM_MC_CALL_CIMPL_1. */
4220DECL_INLINE_THROW(uint32_t)
4221iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4222 uintptr_t pfnCImpl, uint8_t idxArg0)
4223{
4224 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4225 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
4226}
4227
4228
4229#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4230 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
4231
4232/** Emits code for IEM_MC_CALL_CIMPL_2. */
4233DECL_INLINE_THROW(uint32_t)
4234iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4235 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
4236{
4237 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4238 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4239 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
4240}
4241
4242
4243#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4244 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4245 (uintptr_t)a_pfnCImpl, a0, a1, a2)
4246
4247/** Emits code for IEM_MC_CALL_CIMPL_3. */
4248DECL_INLINE_THROW(uint32_t)
4249iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4250 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4251{
4252 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4253 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4254 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4255 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
4256}
4257
4258
4259#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
4260 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4261 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
4262
4263/** Emits code for IEM_MC_CALL_CIMPL_4. */
4264DECL_INLINE_THROW(uint32_t)
4265iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4266 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4267{
4268 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4269 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4270 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4271 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4272 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
4273}
4274
4275
4276#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
4277 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4278 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
4279
4280/** Emits code for IEM_MC_CALL_CIMPL_4. */
4281DECL_INLINE_THROW(uint32_t)
4282iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4283 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
4284{
4285 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4286 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4287 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4288 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4289 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
4290 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
4291}
4292
4293
4294/** Recompiler debugging: Flush guest register shadow copies. */
4295#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
4296
4297
4298
4299/*********************************************************************************************************************************
4300* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
4301*********************************************************************************************************************************/
4302
4303/**
4304 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
4305 */
4306DECL_INLINE_THROW(uint32_t)
4307iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4308 uintptr_t pfnAImpl, uint8_t cArgs)
4309{
4310 if (idxVarRc != UINT8_MAX)
4311 {
4312 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
4313 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
4314 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
4315 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
4316 }
4317
4318 /*
4319 * Do all the call setup and cleanup.
4320 *
4321 * It is only required to flush pending guest register writes in call volatile registers as
4322 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
4323 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
4324 * no matter the fFlushPendingWrites parameter.
4325 */
4326 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
4327
4328 /*
4329 * Make the call and update the return code variable if we've got one.
4330 */
4331 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
4332 if (idxVarRc != UINT8_MAX)
4333 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off, false /*fAllocated*/);
4334
4335 return off;
4336}
4337
4338
4339
4340#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
4341 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
4342
4343#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
4344 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
4345
4346/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
4347DECL_INLINE_THROW(uint32_t)
4348iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
4349{
4350 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
4351}
4352
4353
4354#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
4355 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
4356
4357#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
4358 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
4359
4360/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
4361DECL_INLINE_THROW(uint32_t)
4362iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
4363{
4364 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4365 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
4366}
4367
4368
4369#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
4370 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
4371
4372#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
4373 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
4374
4375/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
4376DECL_INLINE_THROW(uint32_t)
4377iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4378 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
4379{
4380 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4381 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4382 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
4383}
4384
4385
4386#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
4387 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
4388
4389#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
4390 IEM_MC_LOCAL(a_rcType, a_rc); \
4391 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
4392
4393/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
4394DECL_INLINE_THROW(uint32_t)
4395iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4396 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4397{
4398 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4399 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4400 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4401 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
4402}
4403
4404
4405#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
4406 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4407
4408#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
4409 IEM_MC_LOCAL(a_rcType, a_rc); \
4410 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4411
4412/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
4413DECL_INLINE_THROW(uint32_t)
4414iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4415 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4416{
4417 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4418 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4419 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4420 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
4421 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
4422}
4423
4424
4425
4426/*********************************************************************************************************************************
4427* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
4428*********************************************************************************************************************************/
4429
4430#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
4431 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
4432
4433#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4434 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
4435
4436#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4437 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
4438
4439#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4440 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
4441
4442
4443/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
4444 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
4445DECL_INLINE_THROW(uint32_t)
4446iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
4447{
4448 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4449 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4450 Assert(iGRegEx < 20);
4451
4452 /* Same discussion as in iemNativeEmitFetchGregU16 */
4453 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4454 kIemNativeGstRegUse_ReadOnly);
4455
4456 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4457 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4458
4459 /* The value is zero-extended to the full 64-bit host register width. */
4460 if (iGRegEx < 16)
4461 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4462 else
4463 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4464
4465 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4466 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4467 return off;
4468}
4469
4470
4471#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4472 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
4473
4474#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4475 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
4476
4477#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4478 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
4479
4480/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
4481DECL_INLINE_THROW(uint32_t)
4482iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
4483{
4484 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4485 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4486 Assert(iGRegEx < 20);
4487
4488 /* Same discussion as in iemNativeEmitFetchGregU16 */
4489 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4490 kIemNativeGstRegUse_ReadOnly);
4491
4492 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4493 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4494
4495 if (iGRegEx < 16)
4496 {
4497 switch (cbSignExtended)
4498 {
4499 case sizeof(uint16_t):
4500 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4501 break;
4502 case sizeof(uint32_t):
4503 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4504 break;
4505 case sizeof(uint64_t):
4506 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4507 break;
4508 default: AssertFailed(); break;
4509 }
4510 }
4511 else
4512 {
4513 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4514 switch (cbSignExtended)
4515 {
4516 case sizeof(uint16_t):
4517 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4518 break;
4519 case sizeof(uint32_t):
4520 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4521 break;
4522 case sizeof(uint64_t):
4523 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4524 break;
4525 default: AssertFailed(); break;
4526 }
4527 }
4528
4529 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4530 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4531 return off;
4532}
4533
4534
4535
4536#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
4537 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
4538
4539#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
4540 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4541
4542#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
4543 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4544
4545/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
4546DECL_INLINE_THROW(uint32_t)
4547iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4548{
4549 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4550 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4551 Assert(iGReg < 16);
4552
4553 /*
4554 * We can either just load the low 16-bit of the GPR into a host register
4555 * for the variable, or we can do so via a shadow copy host register. The
4556 * latter will avoid having to reload it if it's being stored later, but
4557 * will waste a host register if it isn't touched again. Since we don't
4558 * know what going to happen, we choose the latter for now.
4559 */
4560 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4561 kIemNativeGstRegUse_ReadOnly);
4562
4563 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4564 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4565 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4566 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4567
4568 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4569 return off;
4570}
4571
4572#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
4573 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
4574
4575/** Emits code for IEM_MC_FETCH_GREG_I16. */
4576DECL_INLINE_THROW(uint32_t)
4577iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4578{
4579 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4580 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
4581 Assert(iGReg < 16);
4582
4583 /*
4584 * We can either just load the low 16-bit of the GPR into a host register
4585 * for the variable, or we can do so via a shadow copy host register. The
4586 * latter will avoid having to reload it if it's being stored later, but
4587 * will waste a host register if it isn't touched again. Since we don't
4588 * know what going to happen, we choose the latter for now.
4589 */
4590 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4591 kIemNativeGstRegUse_ReadOnly);
4592
4593 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4594 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4595#ifdef RT_ARCH_AMD64
4596 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4597#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
4598 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4599#endif
4600 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4601
4602 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4603 return off;
4604}
4605
4606
4607#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
4608 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4609
4610#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
4611 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4612
4613/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
4614DECL_INLINE_THROW(uint32_t)
4615iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
4616{
4617 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4618 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4619 Assert(iGReg < 16);
4620
4621 /*
4622 * We can either just load the low 16-bit of the GPR into a host register
4623 * for the variable, or we can do so via a shadow copy host register. The
4624 * latter will avoid having to reload it if it's being stored later, but
4625 * will waste a host register if it isn't touched again. Since we don't
4626 * know what going to happen, we choose the latter for now.
4627 */
4628 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4629 kIemNativeGstRegUse_ReadOnly);
4630
4631 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4632 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4633 if (cbSignExtended == sizeof(uint32_t))
4634 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4635 else
4636 {
4637 Assert(cbSignExtended == sizeof(uint64_t));
4638 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4639 }
4640 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4641
4642 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4643 return off;
4644}
4645
4646
4647#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
4648 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
4649
4650#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
4651 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
4652
4653#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
4654 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
4655
4656/** Emits code for IEM_MC_FETCH_GREG_U32. */
4657DECL_INLINE_THROW(uint32_t)
4658iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4659{
4660 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4661 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4662 Assert(iGReg < 16);
4663
4664 /*
4665 * We can either just load the low 16-bit of the GPR into a host register
4666 * for the variable, or we can do so via a shadow copy host register. The
4667 * latter will avoid having to reload it if it's being stored later, but
4668 * will waste a host register if it isn't touched again. Since we don't
4669 * know what going to happen, we choose the latter for now.
4670 */
4671 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4672 kIemNativeGstRegUse_ReadOnly);
4673
4674 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4675 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4676 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4677 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4678
4679 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4680 return off;
4681}
4682
4683
4684#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4685 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4686
4687/** Emits code for IEM_MC_FETCH_GREG_U32. */
4688DECL_INLINE_THROW(uint32_t)
4689iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4690{
4691 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4692 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4693 Assert(iGReg < 16);
4694
4695 /*
4696 * We can either just load the low 32-bit of the GPR into a host register
4697 * for the variable, or we can do so via a shadow copy host register. The
4698 * latter will avoid having to reload it if it's being stored later, but
4699 * will waste a host register if it isn't touched again. Since we don't
4700 * know what going to happen, we choose the latter for now.
4701 */
4702 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4703 kIemNativeGstRegUse_ReadOnly);
4704
4705 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4706 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4707 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4708 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4709
4710 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4711 return off;
4712}
4713
4714
4715#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4716 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4717
4718#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4719 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4720
4721/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4722 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4723DECL_INLINE_THROW(uint32_t)
4724iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4725{
4726 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4727 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4728 Assert(iGReg < 16);
4729
4730 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4731 kIemNativeGstRegUse_ReadOnly);
4732
4733 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4734 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4735 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4736 /** @todo name the register a shadow one already? */
4737 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4738
4739 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4740 return off;
4741}
4742
4743
4744#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4745#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4746 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4747
4748/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4749DECL_INLINE_THROW(uint32_t)
4750iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4751{
4752 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4753 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4754 Assert(iGRegLo < 16 && iGRegHi < 16);
4755
4756 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4757 kIemNativeGstRegUse_ReadOnly);
4758 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4759 kIemNativeGstRegUse_ReadOnly);
4760
4761 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4762 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4763 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4764 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4765
4766 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4767 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4768 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4769 return off;
4770}
4771#endif
4772
4773
4774/*********************************************************************************************************************************
4775* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4776*********************************************************************************************************************************/
4777
4778#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4779 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4780
4781/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4782DECL_INLINE_THROW(uint32_t)
4783iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4784{
4785 Assert(iGRegEx < 20);
4786 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4787 kIemNativeGstRegUse_ForUpdate);
4788#ifdef RT_ARCH_AMD64
4789 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4790
4791 /* To the lowest byte of the register: mov r8, imm8 */
4792 if (iGRegEx < 16)
4793 {
4794 if (idxGstTmpReg >= 8)
4795 pbCodeBuf[off++] = X86_OP_REX_B;
4796 else if (idxGstTmpReg >= 4)
4797 pbCodeBuf[off++] = X86_OP_REX;
4798 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4799 pbCodeBuf[off++] = u8Value;
4800 }
4801 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4802 else if (idxGstTmpReg < 4)
4803 {
4804 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4805 pbCodeBuf[off++] = u8Value;
4806 }
4807 else
4808 {
4809 /* ror reg64, 8 */
4810 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4811 pbCodeBuf[off++] = 0xc1;
4812 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4813 pbCodeBuf[off++] = 8;
4814
4815 /* mov reg8, imm8 */
4816 if (idxGstTmpReg >= 8)
4817 pbCodeBuf[off++] = X86_OP_REX_B;
4818 else if (idxGstTmpReg >= 4)
4819 pbCodeBuf[off++] = X86_OP_REX;
4820 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4821 pbCodeBuf[off++] = u8Value;
4822
4823 /* rol reg64, 8 */
4824 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4825 pbCodeBuf[off++] = 0xc1;
4826 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4827 pbCodeBuf[off++] = 8;
4828 }
4829
4830#elif defined(RT_ARCH_ARM64)
4831 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4832 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4833 if (iGRegEx < 16)
4834 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4835 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4836 else
4837 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4838 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4839 iemNativeRegFreeTmp(pReNative, idxImmReg);
4840
4841#else
4842# error "Port me!"
4843#endif
4844
4845 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4846
4847#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4848 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4849#endif
4850
4851 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4852 return off;
4853}
4854
4855
4856#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4857 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4858
4859/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4860DECL_INLINE_THROW(uint32_t)
4861iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4862{
4863 Assert(iGRegEx < 20);
4864 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4865
4866 /*
4867 * If it's a constant value (unlikely) we treat this as a
4868 * IEM_MC_STORE_GREG_U8_CONST statement.
4869 */
4870 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4871 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4872 { /* likely */ }
4873 else
4874 {
4875 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4876 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4877 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4878 }
4879
4880 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4881 kIemNativeGstRegUse_ForUpdate);
4882 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4883
4884#ifdef RT_ARCH_AMD64
4885 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4886 if (iGRegEx < 16)
4887 {
4888 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4889 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4890 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4891 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4892 pbCodeBuf[off++] = X86_OP_REX;
4893 pbCodeBuf[off++] = 0x8a;
4894 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4895 }
4896 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4897 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4898 {
4899 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4900 pbCodeBuf[off++] = 0x8a;
4901 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4902 }
4903 else
4904 {
4905 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4906
4907 /* ror reg64, 8 */
4908 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4909 pbCodeBuf[off++] = 0xc1;
4910 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4911 pbCodeBuf[off++] = 8;
4912
4913 /* mov reg8, reg8(r/m) */
4914 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4915 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4916 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4917 pbCodeBuf[off++] = X86_OP_REX;
4918 pbCodeBuf[off++] = 0x8a;
4919 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4920
4921 /* rol reg64, 8 */
4922 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4923 pbCodeBuf[off++] = 0xc1;
4924 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4925 pbCodeBuf[off++] = 8;
4926 }
4927
4928#elif defined(RT_ARCH_ARM64)
4929 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4930 or
4931 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4932 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4933 if (iGRegEx < 16)
4934 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4935 else
4936 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4937
4938#else
4939# error "Port me!"
4940#endif
4941 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4942
4943 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4944
4945#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4946 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4947#endif
4948 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4949 return off;
4950}
4951
4952
4953
4954#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4955 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4956
4957/** Emits code for IEM_MC_STORE_GREG_U16. */
4958DECL_INLINE_THROW(uint32_t)
4959iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4960{
4961 Assert(iGReg < 16);
4962 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4963 kIemNativeGstRegUse_ForUpdate);
4964#ifdef RT_ARCH_AMD64
4965 /* mov reg16, imm16 */
4966 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4967 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4968 if (idxGstTmpReg >= 8)
4969 pbCodeBuf[off++] = X86_OP_REX_B;
4970 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4971 pbCodeBuf[off++] = RT_BYTE1(uValue);
4972 pbCodeBuf[off++] = RT_BYTE2(uValue);
4973
4974#elif defined(RT_ARCH_ARM64)
4975 /* movk xdst, #uValue, lsl #0 */
4976 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4977 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4978
4979#else
4980# error "Port me!"
4981#endif
4982
4983 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4984
4985#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4986 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4987#endif
4988 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4989 return off;
4990}
4991
4992
4993#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4994 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4995
4996/** Emits code for IEM_MC_STORE_GREG_U16. */
4997DECL_INLINE_THROW(uint32_t)
4998iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4999{
5000 Assert(iGReg < 16);
5001 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5002
5003 /*
5004 * If it's a constant value (unlikely) we treat this as a
5005 * IEM_MC_STORE_GREG_U16_CONST statement.
5006 */
5007 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5008 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5009 { /* likely */ }
5010 else
5011 {
5012 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5013 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5014 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
5015 }
5016
5017 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5018 kIemNativeGstRegUse_ForUpdate);
5019
5020#ifdef RT_ARCH_AMD64
5021 /* mov reg16, reg16 or [mem16] */
5022 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
5023 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5024 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5025 {
5026 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
5027 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
5028 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
5029 pbCodeBuf[off++] = 0x8b;
5030 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
5031 }
5032 else
5033 {
5034 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
5035 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
5036 if (idxGstTmpReg >= 8)
5037 pbCodeBuf[off++] = X86_OP_REX_R;
5038 pbCodeBuf[off++] = 0x8b;
5039 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
5040 }
5041
5042#elif defined(RT_ARCH_ARM64)
5043 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
5044 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
5045 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5046 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
5047 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5048
5049#else
5050# error "Port me!"
5051#endif
5052
5053 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5054
5055#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5056 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5057#endif
5058 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5059 return off;
5060}
5061
5062
5063#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
5064 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
5065
5066/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
5067DECL_INLINE_THROW(uint32_t)
5068iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
5069{
5070 Assert(iGReg < 16);
5071 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5072 kIemNativeGstRegUse_ForFullWrite);
5073 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5074#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5075 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5076#endif
5077 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5078 return off;
5079}
5080
5081
5082#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
5083 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
5084
5085#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
5086 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
5087
5088/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
5089DECL_INLINE_THROW(uint32_t)
5090iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5091{
5092 Assert(iGReg < 16);
5093 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5094
5095 /*
5096 * If it's a constant value (unlikely) we treat this as a
5097 * IEM_MC_STORE_GREG_U32_CONST statement.
5098 */
5099 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5100 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5101 { /* likely */ }
5102 else
5103 {
5104 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5105 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5106 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
5107 }
5108
5109 /*
5110 * For the rest we allocate a guest register for the variable and writes
5111 * it to the CPUMCTX structure.
5112 */
5113 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5114#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5115 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5116#else
5117 RT_NOREF(idxVarReg);
5118#endif
5119#ifdef VBOX_STRICT
5120 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
5121#endif
5122 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5123 return off;
5124}
5125
5126
5127#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
5128 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
5129
5130/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
5131DECL_INLINE_THROW(uint32_t)
5132iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
5133{
5134 Assert(iGReg < 16);
5135 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5136 kIemNativeGstRegUse_ForFullWrite);
5137 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5138#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5139 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5140#endif
5141 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5142 return off;
5143}
5144
5145
5146#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
5147 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
5148
5149#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
5150 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
5151
5152/** Emits code for IEM_MC_STORE_GREG_U64. */
5153DECL_INLINE_THROW(uint32_t)
5154iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5155{
5156 Assert(iGReg < 16);
5157 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5158
5159 /*
5160 * If it's a constant value (unlikely) we treat this as a
5161 * IEM_MC_STORE_GREG_U64_CONST statement.
5162 */
5163 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5164 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5165 { /* likely */ }
5166 else
5167 {
5168 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5169 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5170 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
5171 }
5172
5173 /*
5174 * For the rest we allocate a guest register for the variable and writes
5175 * it to the CPUMCTX structure.
5176 */
5177 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5178#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5179 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5180#else
5181 RT_NOREF(idxVarReg);
5182#endif
5183 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5184 return off;
5185}
5186
5187
5188#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
5189 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
5190
5191/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
5192DECL_INLINE_THROW(uint32_t)
5193iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
5194{
5195 Assert(iGReg < 16);
5196 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5197 kIemNativeGstRegUse_ForUpdate);
5198 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
5199#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5200 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5201#endif
5202 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5203 return off;
5204}
5205
5206
5207#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5208#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
5209 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
5210
5211/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
5212DECL_INLINE_THROW(uint32_t)
5213iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
5214{
5215 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5216 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
5217 Assert(iGRegLo < 16 && iGRegHi < 16);
5218
5219 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
5220 kIemNativeGstRegUse_ForFullWrite);
5221 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
5222 kIemNativeGstRegUse_ForFullWrite);
5223
5224 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5225 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
5226 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
5227 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
5228
5229 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
5230 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
5231 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
5232 return off;
5233}
5234#endif
5235
5236
5237/*********************************************************************************************************************************
5238* General purpose register manipulation (add, sub). *
5239*********************************************************************************************************************************/
5240
5241#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5242 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5243
5244/** Emits code for IEM_MC_ADD_GREG_U16. */
5245DECL_INLINE_THROW(uint32_t)
5246iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
5247{
5248 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5249 kIemNativeGstRegUse_ForUpdate);
5250
5251#ifdef RT_ARCH_AMD64
5252 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5253 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5254 if (idxGstTmpReg >= 8)
5255 pbCodeBuf[off++] = X86_OP_REX_B;
5256 if (uAddend == 1)
5257 {
5258 pbCodeBuf[off++] = 0xff; /* inc */
5259 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5260 }
5261 else
5262 {
5263 pbCodeBuf[off++] = 0x81;
5264 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5265 pbCodeBuf[off++] = uAddend;
5266 pbCodeBuf[off++] = 0;
5267 }
5268
5269#else
5270 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5271 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5272
5273 /* sub tmp, gstgrp, uAddend */
5274 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
5275
5276 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5277 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5278
5279 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5280#endif
5281
5282 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5283
5284#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5285 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5286#endif
5287
5288 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5289 return off;
5290}
5291
5292
5293#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
5294 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5295
5296#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
5297 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5298
5299/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
5300DECL_INLINE_THROW(uint32_t)
5301iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
5302{
5303 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5304 kIemNativeGstRegUse_ForUpdate);
5305
5306#ifdef RT_ARCH_AMD64
5307 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5308 if (f64Bit)
5309 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5310 else if (idxGstTmpReg >= 8)
5311 pbCodeBuf[off++] = X86_OP_REX_B;
5312 if (uAddend == 1)
5313 {
5314 pbCodeBuf[off++] = 0xff; /* inc */
5315 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5316 }
5317 else if (uAddend < 128)
5318 {
5319 pbCodeBuf[off++] = 0x83; /* add */
5320 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5321 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5322 }
5323 else
5324 {
5325 pbCodeBuf[off++] = 0x81; /* add */
5326 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5327 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5328 pbCodeBuf[off++] = 0;
5329 pbCodeBuf[off++] = 0;
5330 pbCodeBuf[off++] = 0;
5331 }
5332
5333#else
5334 /* sub tmp, gstgrp, uAddend */
5335 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5336 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
5337
5338#endif
5339
5340 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5341
5342#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5343 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5344#endif
5345
5346 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5347 return off;
5348}
5349
5350
5351
5352#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5353 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5354
5355/** Emits code for IEM_MC_SUB_GREG_U16. */
5356DECL_INLINE_THROW(uint32_t)
5357iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5358{
5359 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5360 kIemNativeGstRegUse_ForUpdate);
5361
5362#ifdef RT_ARCH_AMD64
5363 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5364 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5365 if (idxGstTmpReg >= 8)
5366 pbCodeBuf[off++] = X86_OP_REX_B;
5367 if (uSubtrahend == 1)
5368 {
5369 pbCodeBuf[off++] = 0xff; /* dec */
5370 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5371 }
5372 else
5373 {
5374 pbCodeBuf[off++] = 0x81;
5375 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5376 pbCodeBuf[off++] = uSubtrahend;
5377 pbCodeBuf[off++] = 0;
5378 }
5379
5380#else
5381 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5382 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5383
5384 /* sub tmp, gstgrp, uSubtrahend */
5385 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5386
5387 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5388 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5389
5390 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5391#endif
5392
5393 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5394
5395#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5396 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5397#endif
5398
5399 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5400 return off;
5401}
5402
5403
5404#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5405 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5406
5407#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5408 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5409
5410/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5411DECL_INLINE_THROW(uint32_t)
5412iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5413{
5414 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5415 kIemNativeGstRegUse_ForUpdate);
5416
5417#ifdef RT_ARCH_AMD64
5418 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5419 if (f64Bit)
5420 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5421 else if (idxGstTmpReg >= 8)
5422 pbCodeBuf[off++] = X86_OP_REX_B;
5423 if (uSubtrahend == 1)
5424 {
5425 pbCodeBuf[off++] = 0xff; /* dec */
5426 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5427 }
5428 else if (uSubtrahend < 128)
5429 {
5430 pbCodeBuf[off++] = 0x83; /* sub */
5431 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5432 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5433 }
5434 else
5435 {
5436 pbCodeBuf[off++] = 0x81; /* sub */
5437 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5438 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5439 pbCodeBuf[off++] = 0;
5440 pbCodeBuf[off++] = 0;
5441 pbCodeBuf[off++] = 0;
5442 }
5443
5444#else
5445 /* sub tmp, gstgrp, uSubtrahend */
5446 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5447 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5448
5449#endif
5450
5451 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5452
5453#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5454 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5455#endif
5456
5457 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5458 return off;
5459}
5460
5461
5462#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
5463 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5464
5465#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
5466 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5467
5468#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
5469 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5470
5471#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
5472 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5473
5474/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
5475DECL_INLINE_THROW(uint32_t)
5476iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5477{
5478#ifdef VBOX_STRICT
5479 switch (cbMask)
5480 {
5481 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5482 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5483 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5484 case sizeof(uint64_t): break;
5485 default: AssertFailedBreak();
5486 }
5487#endif
5488
5489 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5490 kIemNativeGstRegUse_ForUpdate);
5491
5492 switch (cbMask)
5493 {
5494 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5495 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
5496 break;
5497 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
5498 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
5499 break;
5500 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5501 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5502 break;
5503 case sizeof(uint64_t):
5504 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
5505 break;
5506 default: AssertFailedBreak();
5507 }
5508
5509 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5510
5511#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5512 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5513#endif
5514
5515 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5516 return off;
5517}
5518
5519
5520#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
5521 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5522
5523#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
5524 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5525
5526#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
5527 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5528
5529#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
5530 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5531
5532/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
5533DECL_INLINE_THROW(uint32_t)
5534iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5535{
5536#ifdef VBOX_STRICT
5537 switch (cbMask)
5538 {
5539 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5540 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5541 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5542 case sizeof(uint64_t): break;
5543 default: AssertFailedBreak();
5544 }
5545#endif
5546
5547 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5548 kIemNativeGstRegUse_ForUpdate);
5549
5550 switch (cbMask)
5551 {
5552 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5553 case sizeof(uint16_t):
5554 case sizeof(uint64_t):
5555 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
5556 break;
5557 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5558 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5559 break;
5560 default: AssertFailedBreak();
5561 }
5562
5563 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5564
5565#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5566 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5567#endif
5568
5569 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5570 return off;
5571}
5572
5573
5574/*********************************************************************************************************************************
5575* Local/Argument variable manipulation (add, sub, and, or). *
5576*********************************************************************************************************************************/
5577
5578#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
5579 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5580
5581#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
5582 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5583
5584#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
5585 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5586
5587#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
5588 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5589
5590
5591#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
5592 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
5593
5594#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
5595 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
5596
5597#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
5598 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
5599
5600/** Emits code for AND'ing a local and a constant value. */
5601DECL_INLINE_THROW(uint32_t)
5602iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5603{
5604#ifdef VBOX_STRICT
5605 switch (cbMask)
5606 {
5607 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5608 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5609 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5610 case sizeof(uint64_t): break;
5611 default: AssertFailedBreak();
5612 }
5613#endif
5614
5615 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5616 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5617
5618 if (cbMask <= sizeof(uint32_t))
5619 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
5620 else
5621 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
5622
5623 iemNativeVarRegisterRelease(pReNative, idxVar);
5624 return off;
5625}
5626
5627
5628#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
5629 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5630
5631#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
5632 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5633
5634#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
5635 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5636
5637#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
5638 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5639
5640/** Emits code for OR'ing a local and a constant value. */
5641DECL_INLINE_THROW(uint32_t)
5642iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5643{
5644#ifdef VBOX_STRICT
5645 switch (cbMask)
5646 {
5647 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5648 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5649 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5650 case sizeof(uint64_t): break;
5651 default: AssertFailedBreak();
5652 }
5653#endif
5654
5655 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5656 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5657
5658 if (cbMask <= sizeof(uint32_t))
5659 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
5660 else
5661 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
5662
5663 iemNativeVarRegisterRelease(pReNative, idxVar);
5664 return off;
5665}
5666
5667
5668#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
5669 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
5670
5671#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
5672 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
5673
5674#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
5675 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
5676
5677/** Emits code for reversing the byte order in a local value. */
5678DECL_INLINE_THROW(uint32_t)
5679iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
5680{
5681 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5682 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5683
5684 switch (cbLocal)
5685 {
5686 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5687 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5688 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5689 default: AssertFailedBreak();
5690 }
5691
5692 iemNativeVarRegisterRelease(pReNative, idxVar);
5693 return off;
5694}
5695
5696
5697#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5698 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5699
5700#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5701 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5702
5703#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5704 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5705
5706/** Emits code for shifting left a local value. */
5707DECL_INLINE_THROW(uint32_t)
5708iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5709{
5710#ifdef VBOX_STRICT
5711 switch (cbLocal)
5712 {
5713 case sizeof(uint8_t): Assert(cShift < 8); break;
5714 case sizeof(uint16_t): Assert(cShift < 16); break;
5715 case sizeof(uint32_t): Assert(cShift < 32); break;
5716 case sizeof(uint64_t): Assert(cShift < 64); break;
5717 default: AssertFailedBreak();
5718 }
5719#endif
5720
5721 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5722 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5723
5724 if (cbLocal <= sizeof(uint32_t))
5725 {
5726 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5727 if (cbLocal < sizeof(uint32_t))
5728 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5729 cbLocal == sizeof(uint16_t)
5730 ? UINT32_C(0xffff)
5731 : UINT32_C(0xff));
5732 }
5733 else
5734 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5735
5736 iemNativeVarRegisterRelease(pReNative, idxVar);
5737 return off;
5738}
5739
5740
5741#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5742 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5743
5744#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5745 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5746
5747#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5748 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5749
5750/** Emits code for shifting left a local value. */
5751DECL_INLINE_THROW(uint32_t)
5752iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5753{
5754#ifdef VBOX_STRICT
5755 switch (cbLocal)
5756 {
5757 case sizeof(int8_t): Assert(cShift < 8); break;
5758 case sizeof(int16_t): Assert(cShift < 16); break;
5759 case sizeof(int32_t): Assert(cShift < 32); break;
5760 case sizeof(int64_t): Assert(cShift < 64); break;
5761 default: AssertFailedBreak();
5762 }
5763#endif
5764
5765 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5766 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5767
5768 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5769 if (cbLocal == sizeof(uint8_t))
5770 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5771 else if (cbLocal == sizeof(uint16_t))
5772 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5773
5774 if (cbLocal <= sizeof(uint32_t))
5775 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5776 else
5777 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5778
5779 iemNativeVarRegisterRelease(pReNative, idxVar);
5780 return off;
5781}
5782
5783
5784#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5785 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5786
5787#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5788 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5789
5790#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5791 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5792
5793/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5794DECL_INLINE_THROW(uint32_t)
5795iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5796{
5797 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5798 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5799 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5800 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5801
5802 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5803 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
5804
5805 /* Need to sign extend the value. */
5806 if (cbLocal <= sizeof(uint32_t))
5807 {
5808/** @todo ARM64: In case of boredone, the extended add instruction can do the
5809 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5810 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5811
5812 switch (cbLocal)
5813 {
5814 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5815 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5816 default: AssertFailed();
5817 }
5818
5819 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5820 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5821 }
5822 else
5823 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5824
5825 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5826 iemNativeVarRegisterRelease(pReNative, idxVar);
5827 return off;
5828}
5829
5830
5831
5832/*********************************************************************************************************************************
5833* EFLAGS *
5834*********************************************************************************************************************************/
5835
5836#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5837# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5838#else
5839# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5840 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5841
5842DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5843{
5844 if (fEflOutput)
5845 {
5846 PVMCPUCC const pVCpu = pReNative->pVCpu;
5847# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5848 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5849 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5850 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5851# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5852 if (fEflOutput & (a_fEfl)) \
5853 { \
5854 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5855 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5856 else \
5857 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5858 } else do { } while (0)
5859# else
5860 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5861 IEMLIVENESSBIT const LivenessClobbered =
5862 {
5863 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5864 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_POTENTIAL_CALL].bm64
5865 | pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5866 | pLivenessEntry->aBits[IEMLIVENESS_BIT_CALL].bm64)
5867 };
5868 IEMLIVENESSBIT const LivenessDelayable =
5869 {
5870 pLivenessEntry->aBits[IEMLIVENESS_BIT_POTENTIAL_CALL].bm64
5871 & pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5872 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5873 | pLivenessEntry->aBits[IEMLIVENESS_BIT_CALL].bm64)
5874 };
5875# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5876 if (fEflOutput & (a_fEfl)) \
5877 { \
5878 if (LivenessClobbered.a_fLivenessMember) \
5879 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5880 else if (LivenessDelayable.a_fLivenessMember) \
5881 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5882 else \
5883 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5884 } else do { } while (0)
5885# endif
5886 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5887 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5888 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5889 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5890 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5891 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5892 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5893# undef CHECK_FLAG_AND_UPDATE_STATS
5894 }
5895 RT_NOREF(fEflInput);
5896}
5897#endif /* VBOX_WITH_STATISTICS */
5898
5899#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5900#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5901 off = iemNativeEmitFetchEFlags<a_fEflInput, iemNativeEflagsToLivenessMask<a_fEflInput>(),\
5902 a_fEflOutput, iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags)
5903
5904/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5905template<uint32_t const a_fEflInput, uint64_t const a_fLivenessEflInput,
5906 uint32_t const a_fEflOutput, uint64_t const a_fLivenessEflOutput>
5907DECL_INLINE_THROW(uint32_t)
5908iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
5909{
5910 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5911 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5912 /** @todo fix NOT AssertCompile(a_fEflInput != 0 || a_fEflOutput != 0); */
5913
5914#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5915# ifdef VBOX_STRICT
5916 if ( pReNative->idxCurCall != 0
5917 && (a_fEflInput != 0 || a_fEflOutput != 0) /* for NOT these are both zero for now. */)
5918 {
5919 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5920 RT_CONSTEXPR uint32_t const fBoth = a_fEflInput | a_fEflOutput;
5921# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5922 AssertMsg( !(fBoth & (a_fElfConst)) \
5923 || (!(a_fEflInput & (a_fElfConst)) \
5924 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5925 : !(a_fEflOutput & (a_fElfConst)) \
5926 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5927 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5928 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5929 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5930 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5931 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5932 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5933 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5934 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5935 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5936# undef ASSERT_ONE_EFL
5937 }
5938# endif
5939#endif
5940
5941 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, a_fEflInput);
5942 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, a_fEflInput);
5943
5944 /** @todo This could be prettier...*/
5945 /** @todo Also, the shadowing+liveness handling of EFlags is currently
5946 * problematic, but I'll try tackle that soon (@bugref{10720}). */
5947 PCIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarEFlags)];
5948 Assert(pVar->enmKind == kIemNativeVarKind_Invalid || pVar->enmKind == kIemNativeVarKind_Stack);
5949 Assert(pVar->idxReg == UINT8_MAX);
5950 if (pVar->uArgNo >= IEMNATIVE_CALL_ARG_GREG_COUNT)
5951 {
5952 /** @todo We could use kIemNativeGstRegUse_ReadOnly here when fOutput is
5953 * zero, but since iemNativeVarRegisterSet clears the shadowing,
5954 * that's counter productive... */
5955 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ForUpdate,
5956 a_fLivenessEflInput, a_fLivenessEflOutput);
5957 iemNativeVarRegisterSet(pReNative, idxVarEFlags, idxGstReg, off, true /*fAllocated*/);
5958 }
5959 else
5960 {
5961 /* Register argument variable: Avoid assertions in generic call code and load it the traditional way. */
5962 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5963 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(pReNative, &off,
5964 a_fLivenessEflInput, a_fLivenessEflOutput);
5965 if (idxGstReg != UINT8_MAX)
5966 {
5967 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstReg);
5968 iemNativeRegFreeTmp(pReNative, idxGstReg);
5969 }
5970 else
5971 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxVarReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5972 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5973 }
5974 return off;
5975}
5976
5977
5978
5979/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5980 * start using it with custom native code emission (inlining assembly
5981 * instruction helpers). */
5982#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5983#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5984 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5985 off = iemNativeEmitCommitEFlags<true /*fUpdateSkipping*/, a_fEflOutput, \
5986 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5987 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5988
5989#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5990#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5991 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5992 off = iemNativeEmitCommitEFlags<false /*fUpdateSkipping*/, a_fEflOutput, \
5993 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5994 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5995
5996/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5997template<bool const a_fUpdateSkipping, uint32_t const a_fEflOutput,
5998 uint64_t const a_fLivenessEflInputBits, uint64_t const a_fLivenessEflOutputBits>
5999DECL_INLINE_THROW(uint32_t)
6000iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fElfInput)
6001{
6002 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
6003 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
6004
6005#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6006# ifdef VBOX_STRICT
6007 if ( pReNative->idxCurCall != 0
6008 && (a_fLivenessEflInputBits != 0 || a_fLivenessEflOutputBits != 0) /* for NOT these are both zero for now. */)
6009 {
6010 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
6011# define ASSERT_ONE_EFL(a_idxField) \
6012 if RT_CONSTEXPR_IF(((a_fLivenessEflInputBits | a_fLivenessEflOutputBits) & RT_BIT_64(a_idxField)) != 0) \
6013 AssertMsg(!(a_fLivenessEflInputBits & RT_BIT_64(a_idxField)) \
6014 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
6015 : !(a_fLivenessEflOutputBits & RT_BIT_64(a_idxField)) \
6016 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
6017 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)), \
6018 ("%s - %u\n", #a_idxField, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
6019 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
6020 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
6021 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
6022 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
6023 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
6024 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
6025 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
6026# undef ASSERT_ONE_EFL
6027 }
6028# endif
6029#endif
6030
6031#ifdef VBOX_STRICT
6032 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
6033 uint32_t offFixup = off;
6034 off = iemNativeEmitJnzToFixed(pReNative, off, off);
6035 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
6036 iemNativeFixupFixedJump(pReNative, offFixup, off);
6037
6038 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
6039 offFixup = off;
6040 off = iemNativeEmitJzToFixed(pReNative, off, off);
6041 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
6042 iemNativeFixupFixedJump(pReNative, offFixup, off);
6043
6044 /** @todo validate that only bits in the a_fEflOutput mask changed. */
6045#endif
6046
6047#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6048 if RT_CONSTEXPR_IF(a_fUpdateSkipping)
6049 {
6050 Assert(!(pReNative->fSkippingEFlags & fElfInput)); RT_NOREF(fElfInput);
6051 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6052 pReNative->fSkippingEFlags = 0;
6053 else
6054 pReNative->fSkippingEFlags &= ~(a_fEflOutput & X86_EFL_STATUS_BITS);
6055# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6056 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6057 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6058 else
6059 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6060 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6061# endif
6062 }
6063#endif
6064
6065 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
6066 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
6067 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
6068 return off;
6069}
6070
6071
6072typedef enum IEMNATIVEMITEFLOP
6073{
6074 kIemNativeEmitEflOp_Set,
6075 kIemNativeEmitEflOp_Clear,
6076 kIemNativeEmitEflOp_Flip
6077} IEMNATIVEMITEFLOP;
6078
6079#define IEM_MC_SET_EFL_BIT(a_fBit) \
6080 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Set, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6081
6082#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
6083 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Clear, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6084
6085#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
6086 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Flip, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6087
6088/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
6089template<IEMNATIVEMITEFLOP const a_enmOp, uint32_t const a_fEflBit, uint64_t const a_fLivenessEflBit>
6090DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6091{
6092 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ForUpdate,
6093 a_enmOp == kIemNativeEmitEflOp_Flip ? a_fLivenessEflBit : 0,
6094 a_fLivenessEflBit);
6095
6096 /* Using 'if constexpr' forces code elimination in debug builds with VC. */
6097 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Set)
6098 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6099 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Clear)
6100 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~a_fEflBit);
6101 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Flip)
6102 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6103 else
6104 AssertCompile( a_enmOp == kIemNativeEmitEflOp_Set /* AssertCompile(false) works with VC 2019 but not clang 15. */
6105 || a_enmOp == kIemNativeEmitEflOp_Clear
6106 || a_enmOp == kIemNativeEmitEflOp_Flip);
6107
6108 /** @todo No delayed writeback for EFLAGS right now. */
6109 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6110
6111 /* Free but don't flush the EFLAGS register. */
6112 iemNativeRegFreeTmp(pReNative, idxEflReg);
6113
6114 return off;
6115}
6116
6117
6118/*********************************************************************************************************************************
6119* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
6120*********************************************************************************************************************************/
6121
6122#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
6123 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
6124
6125#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
6126 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
6127
6128#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
6129 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
6130
6131
6132/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
6133 * IEM_MC_FETCH_SREG_ZX_U64. */
6134DECL_INLINE_THROW(uint32_t)
6135iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
6136{
6137 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6138 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
6139 Assert(iSReg < X86_SREG_COUNT);
6140
6141 /*
6142 * For now, we will not create a shadow copy of a selector. The rational
6143 * is that since we do not recompile the popping and loading of segment
6144 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
6145 * pushing and moving to registers, there is only a small chance that the
6146 * shadow copy will be accessed again before the register is reloaded. One
6147 * scenario would be nested called in 16-bit code, but I doubt it's worth
6148 * the extra register pressure atm.
6149 *
6150 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
6151 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
6152 * store scencario covered at present (r160730).
6153 */
6154 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6155 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6156 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
6157 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6158 return off;
6159}
6160
6161
6162
6163/*********************************************************************************************************************************
6164* Register references. *
6165*********************************************************************************************************************************/
6166
6167#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6168 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6169
6170#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
6171 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6172
6173/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6174DECL_INLINE_THROW(uint32_t)
6175iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6176{
6177 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6178 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6179 Assert(iGRegEx < 20);
6180
6181 if (iGRegEx < 16)
6182 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6183 else
6184 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6185
6186 /* If we've delayed writing back the register value, flush it now. */
6187 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6188
6189 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6190 if (!fConst)
6191 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6192
6193 return off;
6194}
6195
6196#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6197 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6198
6199#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6200 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6201
6202#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6203 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6204
6205#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6206 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6207
6208#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
6209 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
6210
6211#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
6212 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
6213
6214#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
6215 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
6216
6217#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
6218 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
6219
6220#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
6221 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
6222
6223#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
6224 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
6225
6226/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
6227DECL_INLINE_THROW(uint32_t)
6228iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
6229{
6230 Assert(iGReg < 16);
6231 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
6232 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6233
6234 /* If we've delayed writing back the register value, flush it now. */
6235 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
6236
6237 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6238 if (!fConst)
6239 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
6240
6241 return off;
6242}
6243
6244
6245#undef IEM_MC_REF_EFLAGS /* should not be used. */
6246#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
6247 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6248 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
6249
6250/** Handles IEM_MC_REF_EFLAGS. */
6251DECL_INLINE_THROW(uint32_t)
6252iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
6253{
6254 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
6255 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6256
6257#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6258 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fEflInput);
6259 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
6260 pReNative->fSkippingEFlags &= ~fEflOutput;
6261# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6262
6263 /* Updating the skipping according to the outputs is a little early, but
6264 we don't have any other hooks for references atm. */
6265 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6266 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6267 else if (fEflOutput & X86_EFL_STATUS_BITS)
6268 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
6269 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6270# endif
6271#endif
6272 RT_NOREF(fEflInput, fEflOutput);
6273
6274 /* If we've delayed writing back the register value, flush it now. */
6275 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
6276
6277 /* If there is a shadow copy of guest EFLAGS, flush it now. */
6278 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
6279
6280 return off;
6281}
6282
6283
6284/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
6285 * different code from threaded recompiler, maybe it would be helpful. For now
6286 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
6287#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
6288
6289
6290#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
6291 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
6292
6293#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
6294 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
6295
6296#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
6297 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
6298
6299#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
6300 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
6301
6302#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6303/* Just being paranoid here. */
6304# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
6305AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
6306AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
6307AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
6308AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
6309# endif
6310AssertCompileMemberOffset(X86XMMREG, au64, 0);
6311AssertCompileMemberOffset(X86XMMREG, au32, 0);
6312AssertCompileMemberOffset(X86XMMREG, ar64, 0);
6313AssertCompileMemberOffset(X86XMMREG, ar32, 0);
6314
6315# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
6316 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
6317# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
6318 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
6319# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
6320 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
6321# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
6322 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
6323#endif
6324
6325/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
6326DECL_INLINE_THROW(uint32_t)
6327iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
6328{
6329 Assert(iXReg < 16);
6330 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
6331 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6332
6333 /* If we've delayed writing back the register value, flush it now. */
6334 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
6335
6336#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6337 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6338 if (!fConst)
6339 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
6340#else
6341 RT_NOREF(fConst);
6342#endif
6343
6344 return off;
6345}
6346
6347
6348
6349/*********************************************************************************************************************************
6350* Effective Address Calculation *
6351*********************************************************************************************************************************/
6352#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6353 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6354
6355/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6356 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6357DECL_INLINE_THROW(uint32_t)
6358iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6359 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6360{
6361 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6362
6363 /*
6364 * Handle the disp16 form with no registers first.
6365 *
6366 * Convert to an immediate value, as that'll delay the register allocation
6367 * and assignment till the memory access / call / whatever and we can use
6368 * a more appropriate register (or none at all).
6369 */
6370 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6371 {
6372 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
6373 return off;
6374 }
6375
6376 /* Determin the displacment. */
6377 uint16_t u16EffAddr;
6378 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6379 {
6380 case 0: u16EffAddr = 0; break;
6381 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6382 case 2: u16EffAddr = u16Disp; break;
6383 default: AssertFailedStmt(u16EffAddr = 0);
6384 }
6385
6386 /* Determine the registers involved. */
6387 uint8_t idxGstRegBase;
6388 uint8_t idxGstRegIndex;
6389 switch (bRm & X86_MODRM_RM_MASK)
6390 {
6391 case 0:
6392 idxGstRegBase = X86_GREG_xBX;
6393 idxGstRegIndex = X86_GREG_xSI;
6394 break;
6395 case 1:
6396 idxGstRegBase = X86_GREG_xBX;
6397 idxGstRegIndex = X86_GREG_xDI;
6398 break;
6399 case 2:
6400 idxGstRegBase = X86_GREG_xBP;
6401 idxGstRegIndex = X86_GREG_xSI;
6402 break;
6403 case 3:
6404 idxGstRegBase = X86_GREG_xBP;
6405 idxGstRegIndex = X86_GREG_xDI;
6406 break;
6407 case 4:
6408 idxGstRegBase = X86_GREG_xSI;
6409 idxGstRegIndex = UINT8_MAX;
6410 break;
6411 case 5:
6412 idxGstRegBase = X86_GREG_xDI;
6413 idxGstRegIndex = UINT8_MAX;
6414 break;
6415 case 6:
6416 idxGstRegBase = X86_GREG_xBP;
6417 idxGstRegIndex = UINT8_MAX;
6418 break;
6419#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6420 default:
6421#endif
6422 case 7:
6423 idxGstRegBase = X86_GREG_xBX;
6424 idxGstRegIndex = UINT8_MAX;
6425 break;
6426 }
6427
6428 /*
6429 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6430 */
6431 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6432 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6433 kIemNativeGstRegUse_ReadOnly);
6434 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6435 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6436 kIemNativeGstRegUse_ReadOnly)
6437 : UINT8_MAX;
6438#ifdef RT_ARCH_AMD64
6439 if (idxRegIndex == UINT8_MAX)
6440 {
6441 if (u16EffAddr == 0)
6442 {
6443 /* movxz ret, base */
6444 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6445 }
6446 else
6447 {
6448 /* lea ret32, [base64 + disp32] */
6449 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6450 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6451 if (idxRegRet >= 8 || idxRegBase >= 8)
6452 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6453 pbCodeBuf[off++] = 0x8d;
6454 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6455 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6456 else
6457 {
6458 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6459 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6460 }
6461 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6462 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6463 pbCodeBuf[off++] = 0;
6464 pbCodeBuf[off++] = 0;
6465 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6466
6467 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6468 }
6469 }
6470 else
6471 {
6472 /* lea ret32, [index64 + base64 (+ disp32)] */
6473 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6474 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6475 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6476 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6477 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6478 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6479 pbCodeBuf[off++] = 0x8d;
6480 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6481 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6482 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6483 if (bMod == X86_MOD_MEM4)
6484 {
6485 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6486 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6487 pbCodeBuf[off++] = 0;
6488 pbCodeBuf[off++] = 0;
6489 }
6490 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6491 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6492 }
6493
6494#elif defined(RT_ARCH_ARM64)
6495 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6496 if (u16EffAddr == 0)
6497 {
6498 if (idxRegIndex == UINT8_MAX)
6499 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
6500 else
6501 {
6502 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
6503 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6504 }
6505 }
6506 else
6507 {
6508 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
6509 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
6510 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
6511 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6512 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
6513 else
6514 {
6515 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
6516 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6517 }
6518 if (idxRegIndex != UINT8_MAX)
6519 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
6520 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6521 }
6522
6523#else
6524# error "port me"
6525#endif
6526
6527 if (idxRegIndex != UINT8_MAX)
6528 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6529 iemNativeRegFreeTmp(pReNative, idxRegBase);
6530 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6531 return off;
6532}
6533
6534
6535#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6536 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6537
6538/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
6539 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
6540DECL_INLINE_THROW(uint32_t)
6541iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6542 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
6543{
6544 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6545
6546 /*
6547 * Handle the disp32 form with no registers first.
6548 *
6549 * Convert to an immediate value, as that'll delay the register allocation
6550 * and assignment till the memory access / call / whatever and we can use
6551 * a more appropriate register (or none at all).
6552 */
6553 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6554 {
6555 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
6556 return off;
6557 }
6558
6559 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6560 uint32_t u32EffAddr = 0;
6561 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6562 {
6563 case 0: break;
6564 case 1: u32EffAddr = (int8_t)u32Disp; break;
6565 case 2: u32EffAddr = u32Disp; break;
6566 default: AssertFailed();
6567 }
6568
6569 /* Get the register (or SIB) value. */
6570 uint8_t idxGstRegBase = UINT8_MAX;
6571 uint8_t idxGstRegIndex = UINT8_MAX;
6572 uint8_t cShiftIndex = 0;
6573 switch (bRm & X86_MODRM_RM_MASK)
6574 {
6575 case 0: idxGstRegBase = X86_GREG_xAX; break;
6576 case 1: idxGstRegBase = X86_GREG_xCX; break;
6577 case 2: idxGstRegBase = X86_GREG_xDX; break;
6578 case 3: idxGstRegBase = X86_GREG_xBX; break;
6579 case 4: /* SIB */
6580 {
6581 /* index /w scaling . */
6582 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6583 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6584 {
6585 case 0: idxGstRegIndex = X86_GREG_xAX; break;
6586 case 1: idxGstRegIndex = X86_GREG_xCX; break;
6587 case 2: idxGstRegIndex = X86_GREG_xDX; break;
6588 case 3: idxGstRegIndex = X86_GREG_xBX; break;
6589 case 4: cShiftIndex = 0; /*no index*/ break;
6590 case 5: idxGstRegIndex = X86_GREG_xBP; break;
6591 case 6: idxGstRegIndex = X86_GREG_xSI; break;
6592 case 7: idxGstRegIndex = X86_GREG_xDI; break;
6593 }
6594
6595 /* base */
6596 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
6597 {
6598 case 0: idxGstRegBase = X86_GREG_xAX; break;
6599 case 1: idxGstRegBase = X86_GREG_xCX; break;
6600 case 2: idxGstRegBase = X86_GREG_xDX; break;
6601 case 3: idxGstRegBase = X86_GREG_xBX; break;
6602 case 4:
6603 idxGstRegBase = X86_GREG_xSP;
6604 u32EffAddr += uSibAndRspOffset >> 8;
6605 break;
6606 case 5:
6607 if ((bRm & X86_MODRM_MOD_MASK) != 0)
6608 idxGstRegBase = X86_GREG_xBP;
6609 else
6610 {
6611 Assert(u32EffAddr == 0);
6612 u32EffAddr = u32Disp;
6613 }
6614 break;
6615 case 6: idxGstRegBase = X86_GREG_xSI; break;
6616 case 7: idxGstRegBase = X86_GREG_xDI; break;
6617 }
6618 break;
6619 }
6620 case 5: idxGstRegBase = X86_GREG_xBP; break;
6621 case 6: idxGstRegBase = X86_GREG_xSI; break;
6622 case 7: idxGstRegBase = X86_GREG_xDI; break;
6623 }
6624
6625 /*
6626 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6627 * the start of the function.
6628 */
6629 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6630 {
6631 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
6632 return off;
6633 }
6634
6635 /*
6636 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6637 */
6638 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6639 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6640 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6641 kIemNativeGstRegUse_ReadOnly);
6642 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6643 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6644 kIemNativeGstRegUse_ReadOnly);
6645
6646 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6647 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6648 {
6649 idxRegBase = idxRegIndex;
6650 idxRegIndex = UINT8_MAX;
6651 }
6652
6653#ifdef RT_ARCH_AMD64
6654 if (idxRegIndex == UINT8_MAX)
6655 {
6656 if (u32EffAddr == 0)
6657 {
6658 /* mov ret, base */
6659 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6660 }
6661 else
6662 {
6663 /* lea ret32, [base64 + disp32] */
6664 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6665 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6666 if (idxRegRet >= 8 || idxRegBase >= 8)
6667 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6668 pbCodeBuf[off++] = 0x8d;
6669 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6670 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6671 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6672 else
6673 {
6674 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6675 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6676 }
6677 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6678 if (bMod == X86_MOD_MEM4)
6679 {
6680 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6681 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6682 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6683 }
6684 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6685 }
6686 }
6687 else
6688 {
6689 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6690 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6691 if (idxRegBase == UINT8_MAX)
6692 {
6693 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
6694 if (idxRegRet >= 8 || idxRegIndex >= 8)
6695 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6696 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6697 pbCodeBuf[off++] = 0x8d;
6698 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6699 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6700 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6701 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6702 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6703 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6704 }
6705 else
6706 {
6707 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6708 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6709 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6710 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6711 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6712 pbCodeBuf[off++] = 0x8d;
6713 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6714 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6715 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6716 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6717 if (bMod != X86_MOD_MEM0)
6718 {
6719 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6720 if (bMod == X86_MOD_MEM4)
6721 {
6722 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6723 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6724 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6725 }
6726 }
6727 }
6728 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6729 }
6730
6731#elif defined(RT_ARCH_ARM64)
6732 if (u32EffAddr == 0)
6733 {
6734 if (idxRegIndex == UINT8_MAX)
6735 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6736 else if (idxRegBase == UINT8_MAX)
6737 {
6738 if (cShiftIndex == 0)
6739 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
6740 else
6741 {
6742 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6743 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6744 }
6745 }
6746 else
6747 {
6748 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6749 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6750 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6751 }
6752 }
6753 else
6754 {
6755 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6756 {
6757 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6758 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6759 }
6760 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6761 {
6762 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6763 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6764 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6765 }
6766 else
6767 {
6768 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6769 if (idxRegBase != UINT8_MAX)
6770 {
6771 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6772 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6773 }
6774 }
6775 if (idxRegIndex != UINT8_MAX)
6776 {
6777 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6778 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6779 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6780 }
6781 }
6782
6783#else
6784# error "port me"
6785#endif
6786
6787 if (idxRegIndex != UINT8_MAX)
6788 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6789 if (idxRegBase != UINT8_MAX)
6790 iemNativeRegFreeTmp(pReNative, idxRegBase);
6791 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6792 return off;
6793}
6794
6795
6796#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6797 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6798 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6799
6800#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6801 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6802 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6803
6804#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6805 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6806 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6807
6808/**
6809 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6810 *
6811 * @returns New off.
6812 * @param pReNative .
6813 * @param off .
6814 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6815 * bit 4 to REX.X. The two bits are part of the
6816 * REG sub-field, which isn't needed in this
6817 * function.
6818 * @param uSibAndRspOffset Two parts:
6819 * - The first 8 bits make up the SIB byte.
6820 * - The next 8 bits are the fixed RSP/ESP offset
6821 * in case of a pop [xSP].
6822 * @param u32Disp The displacement byte/word/dword, if any.
6823 * @param cbInstr The size of the fully decoded instruction. Used
6824 * for RIP relative addressing.
6825 * @param idxVarRet The result variable number.
6826 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6827 * when calculating the address.
6828 *
6829 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6830 */
6831DECL_INLINE_THROW(uint32_t)
6832iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6833 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6834{
6835 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6836
6837 /*
6838 * Special case the rip + disp32 form first.
6839 */
6840 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6841 {
6842 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6843 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6844 kIemNativeGstRegUse_ReadOnly);
6845 if (f64Bit)
6846 {
6847#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6848 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr + (int64_t)pReNative->Core.offPc;
6849#else
6850 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6851#endif
6852#ifdef RT_ARCH_AMD64
6853 if ((int32_t)offFinalDisp == offFinalDisp)
6854 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6855 else
6856 {
6857 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6858 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6859 }
6860#else
6861 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6862#endif
6863 }
6864 else
6865 {
6866# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6867 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr + (int32_t)pReNative->Core.offPc;
6868# else
6869 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr;
6870# endif
6871 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6872 }
6873 iemNativeRegFreeTmp(pReNative, idxRegPc);
6874 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6875 return off;
6876 }
6877
6878 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6879 int64_t i64EffAddr = 0;
6880 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6881 {
6882 case 0: break;
6883 case 1: i64EffAddr = (int8_t)u32Disp; break;
6884 case 2: i64EffAddr = (int32_t)u32Disp; break;
6885 default: AssertFailed();
6886 }
6887
6888 /* Get the register (or SIB) value. */
6889 uint8_t idxGstRegBase = UINT8_MAX;
6890 uint8_t idxGstRegIndex = UINT8_MAX;
6891 uint8_t cShiftIndex = 0;
6892 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6893 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6894 else /* SIB: */
6895 {
6896 /* index /w scaling . */
6897 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6898 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6899 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6900 if (idxGstRegIndex == 4)
6901 {
6902 /* no index */
6903 cShiftIndex = 0;
6904 idxGstRegIndex = UINT8_MAX;
6905 }
6906
6907 /* base */
6908 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6909 if (idxGstRegBase == 4)
6910 {
6911 /* pop [rsp] hack */
6912 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6913 }
6914 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6915 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6916 {
6917 /* mod=0 and base=5 -> disp32, no base reg. */
6918 Assert(i64EffAddr == 0);
6919 i64EffAddr = (int32_t)u32Disp;
6920 idxGstRegBase = UINT8_MAX;
6921 }
6922 }
6923
6924 /*
6925 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6926 * the start of the function.
6927 */
6928 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6929 {
6930 if (f64Bit)
6931 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6932 else
6933 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6934 return off;
6935 }
6936
6937 /*
6938 * Now emit code that calculates:
6939 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6940 * or if !f64Bit:
6941 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6942 */
6943 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6944 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6945 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6946 kIemNativeGstRegUse_ReadOnly);
6947 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6948 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6949 kIemNativeGstRegUse_ReadOnly);
6950
6951 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6952 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6953 {
6954 idxRegBase = idxRegIndex;
6955 idxRegIndex = UINT8_MAX;
6956 }
6957
6958#ifdef RT_ARCH_AMD64
6959 uint8_t bFinalAdj;
6960 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6961 bFinalAdj = 0; /* likely */
6962 else
6963 {
6964 /* pop [rsp] with a problematic disp32 value. Split out the
6965 RSP offset and add it separately afterwards (bFinalAdj). */
6966 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6967 Assert(idxGstRegBase == X86_GREG_xSP);
6968 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6969 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6970 Assert(bFinalAdj != 0);
6971 i64EffAddr -= bFinalAdj;
6972 Assert((int32_t)i64EffAddr == i64EffAddr);
6973 }
6974 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6975//pReNative->pInstrBuf[off++] = 0xcc;
6976
6977 if (idxRegIndex == UINT8_MAX)
6978 {
6979 if (u32EffAddr == 0)
6980 {
6981 /* mov ret, base */
6982 if (f64Bit)
6983 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6984 else
6985 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6986 }
6987 else
6988 {
6989 /* lea ret, [base + disp32] */
6990 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6991 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6992 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6993 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6994 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6995 | (f64Bit ? X86_OP_REX_W : 0);
6996 pbCodeBuf[off++] = 0x8d;
6997 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6998 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6999 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
7000 else
7001 {
7002 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7003 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
7004 }
7005 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7006 if (bMod == X86_MOD_MEM4)
7007 {
7008 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7009 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7010 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7011 }
7012 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7013 }
7014 }
7015 else
7016 {
7017 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
7018 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7019 if (idxRegBase == UINT8_MAX)
7020 {
7021 /* lea ret, [(index64 << cShiftIndex) + disp32] */
7022 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
7023 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7024 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7025 | (f64Bit ? X86_OP_REX_W : 0);
7026 pbCodeBuf[off++] = 0x8d;
7027 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
7028 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
7029 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7030 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7031 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7032 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7033 }
7034 else
7035 {
7036 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
7037 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
7038 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7039 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7040 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7041 | (f64Bit ? X86_OP_REX_W : 0);
7042 pbCodeBuf[off++] = 0x8d;
7043 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
7044 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7045 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7046 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
7047 if (bMod != X86_MOD_MEM0)
7048 {
7049 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7050 if (bMod == X86_MOD_MEM4)
7051 {
7052 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7053 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7054 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7055 }
7056 }
7057 }
7058 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7059 }
7060
7061 if (!bFinalAdj)
7062 { /* likely */ }
7063 else
7064 {
7065 Assert(f64Bit);
7066 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
7067 }
7068
7069#elif defined(RT_ARCH_ARM64)
7070 if (i64EffAddr == 0)
7071 {
7072 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7073 if (idxRegIndex == UINT8_MAX)
7074 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
7075 else if (idxRegBase != UINT8_MAX)
7076 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
7077 f64Bit, false /*fSetFlags*/, cShiftIndex);
7078 else
7079 {
7080 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
7081 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
7082 }
7083 }
7084 else
7085 {
7086 if (f64Bit)
7087 { /* likely */ }
7088 else
7089 i64EffAddr = (int32_t)i64EffAddr;
7090
7091 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
7092 {
7093 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7094 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
7095 }
7096 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
7097 {
7098 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7099 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
7100 }
7101 else
7102 {
7103 if (f64Bit)
7104 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
7105 else
7106 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
7107 if (idxRegBase != UINT8_MAX)
7108 {
7109 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7110 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
7111 }
7112 }
7113 if (idxRegIndex != UINT8_MAX)
7114 {
7115 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7116 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
7117 f64Bit, false /*fSetFlags*/, cShiftIndex);
7118 }
7119 }
7120
7121#else
7122# error "port me"
7123#endif
7124
7125 if (idxRegIndex != UINT8_MAX)
7126 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7127 if (idxRegBase != UINT8_MAX)
7128 iemNativeRegFreeTmp(pReNative, idxRegBase);
7129 iemNativeVarRegisterRelease(pReNative, idxVarRet);
7130 return off;
7131}
7132
7133
7134/*********************************************************************************************************************************
7135* Memory fetches and stores common *
7136*********************************************************************************************************************************/
7137
7138typedef enum IEMNATIVEMITMEMOP
7139{
7140 kIemNativeEmitMemOp_Store = 0,
7141 kIemNativeEmitMemOp_Fetch,
7142 kIemNativeEmitMemOp_Fetch_Zx_U16,
7143 kIemNativeEmitMemOp_Fetch_Zx_U32,
7144 kIemNativeEmitMemOp_Fetch_Zx_U64,
7145 kIemNativeEmitMemOp_Fetch_Sx_U16,
7146 kIemNativeEmitMemOp_Fetch_Sx_U32,
7147 kIemNativeEmitMemOp_Fetch_Sx_U64
7148} IEMNATIVEMITMEMOP;
7149
7150/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
7151 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
7152 * (with iSegReg = UINT8_MAX). */
7153DECL_INLINE_THROW(uint32_t)
7154iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
7155 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
7156 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
7157{
7158 /*
7159 * Assert sanity.
7160 */
7161 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7162 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7163 Assert( enmOp != kIemNativeEmitMemOp_Store
7164 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7165 || pVarValue->enmKind == kIemNativeVarKind_Stack);
7166 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7167 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7168 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7169 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7170 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7171 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7172#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7173 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
7174 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
7175#else
7176 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
7177#endif
7178 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
7179 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7180#ifdef VBOX_STRICT
7181 if (iSegReg == UINT8_MAX)
7182 {
7183 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7184 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7185 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7186 switch (cbMem)
7187 {
7188 case 1:
7189 Assert( pfnFunction
7190 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
7191 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7192 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7193 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7194 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7195 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
7196 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
7197 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
7198 : UINT64_C(0xc000b000a0009000) ));
7199 Assert(!fAlignMaskAndCtl);
7200 break;
7201 case 2:
7202 Assert( pfnFunction
7203 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
7204 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7205 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7206 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7207 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
7208 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
7209 : UINT64_C(0xc000b000a0009000) ));
7210 Assert(fAlignMaskAndCtl <= 1);
7211 break;
7212 case 4:
7213 Assert( pfnFunction
7214 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
7215 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7216 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7217 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
7218 : UINT64_C(0xc000b000a0009000) ));
7219 Assert(fAlignMaskAndCtl <= 3);
7220 break;
7221 case 8:
7222 Assert( pfnFunction
7223 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
7224 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
7225 : UINT64_C(0xc000b000a0009000) ));
7226 Assert(fAlignMaskAndCtl <= 7);
7227 break;
7228#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7229 case sizeof(RTUINT128U):
7230 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7231 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
7232 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7233 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
7234 || ( enmOp == kIemNativeEmitMemOp_Store
7235 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7236 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
7237 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7238 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7239 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
7240 : fAlignMaskAndCtl <= 15);
7241 break;
7242 case sizeof(RTUINT256U):
7243 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7244 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
7245 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
7246 || ( enmOp == kIemNativeEmitMemOp_Store
7247 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
7248 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
7249 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
7250 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
7251 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
7252 : fAlignMaskAndCtl <= 31);
7253 break;
7254#endif
7255 }
7256 }
7257 else
7258 {
7259 Assert(iSegReg < 6);
7260 switch (cbMem)
7261 {
7262 case 1:
7263 Assert( pfnFunction
7264 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
7265 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
7266 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7267 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7268 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7269 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
7270 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
7271 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
7272 : UINT64_C(0xc000b000a0009000) ));
7273 Assert(!fAlignMaskAndCtl);
7274 break;
7275 case 2:
7276 Assert( pfnFunction
7277 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
7278 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
7279 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7280 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7281 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
7282 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
7283 : UINT64_C(0xc000b000a0009000) ));
7284 Assert(fAlignMaskAndCtl <= 1);
7285 break;
7286 case 4:
7287 Assert( pfnFunction
7288 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
7289 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
7290 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
7291 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
7292 : UINT64_C(0xc000b000a0009000) ));
7293 Assert(fAlignMaskAndCtl <= 3);
7294 break;
7295 case 8:
7296 Assert( pfnFunction
7297 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
7298 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
7299 : UINT64_C(0xc000b000a0009000) ));
7300 Assert(fAlignMaskAndCtl <= 7);
7301 break;
7302#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7303 case sizeof(RTUINT128U):
7304 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7305 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
7306 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7307 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
7308 || ( enmOp == kIemNativeEmitMemOp_Store
7309 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7310 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
7311 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7312 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7313 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
7314 : fAlignMaskAndCtl <= 15);
7315 break;
7316 case sizeof(RTUINT256U):
7317 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7318 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
7319 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
7320 || ( enmOp == kIemNativeEmitMemOp_Store
7321 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
7322 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
7323 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
7324 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
7325 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
7326 : fAlignMaskAndCtl <= 31);
7327 break;
7328#endif
7329 }
7330 }
7331#endif
7332
7333#ifdef VBOX_STRICT
7334 /*
7335 * Check that the fExec flags we've got make sense.
7336 */
7337 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7338#endif
7339
7340 /*
7341 * To keep things simple we have to commit any pending writes first as we
7342 * may end up making calls.
7343 */
7344 /** @todo we could postpone this till we make the call and reload the
7345 * registers after returning from the call. Not sure if that's sensible or
7346 * not, though. */
7347#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7348 off = iemNativeRegFlushPendingWrites(pReNative, off);
7349#else
7350 /* The program counter is treated differently for now. */
7351 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
7352#endif
7353
7354#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7355 /*
7356 * Move/spill/flush stuff out of call-volatile registers.
7357 * This is the easy way out. We could contain this to the tlb-miss branch
7358 * by saving and restoring active stuff here.
7359 */
7360 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7361#endif
7362
7363 /*
7364 * Define labels and allocate the result register (trying for the return
7365 * register if we can).
7366 */
7367 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7368#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7369 uint8_t idxRegValueFetch = UINT8_MAX;
7370
7371 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7372 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7373 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
7374 else
7375 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7376 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7377 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7378 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7379#else
7380 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7381 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7382 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7383 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7384#endif
7385 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
7386
7387#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7388 uint8_t idxRegValueStore = UINT8_MAX;
7389
7390 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7391 idxRegValueStore = !TlbState.fSkip
7392 && enmOp == kIemNativeEmitMemOp_Store
7393 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7394 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7395 : UINT8_MAX;
7396 else
7397 idxRegValueStore = !TlbState.fSkip
7398 && enmOp == kIemNativeEmitMemOp_Store
7399 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7400 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7401 : UINT8_MAX;
7402
7403#else
7404 uint8_t const idxRegValueStore = !TlbState.fSkip
7405 && enmOp == kIemNativeEmitMemOp_Store
7406 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7407 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7408 : UINT8_MAX;
7409#endif
7410 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7411 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7412 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7413 : UINT32_MAX;
7414
7415 /*
7416 * Jump to the TLB lookup code.
7417 */
7418 if (!TlbState.fSkip)
7419 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7420
7421 /*
7422 * TlbMiss:
7423 *
7424 * Call helper to do the fetching.
7425 * We flush all guest register shadow copies here.
7426 */
7427 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7428
7429#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7430 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7431#else
7432 RT_NOREF(idxInstr);
7433#endif
7434
7435#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7436 if (pReNative->Core.offPc)
7437 {
7438 /*
7439 * Update the program counter but restore it at the end of the TlbMiss branch.
7440 * This should allow delaying more program counter updates for the TlbLookup and hit paths
7441 * which are hopefully much more frequent, reducing the amount of memory accesses.
7442 */
7443 /* Allocate a temporary PC register. */
7444/** @todo r=bird: This would technically need to be done up front as it's a register allocation. */
7445 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7446 kIemNativeGstRegUse_ForUpdate);
7447
7448 /* Perform the addition and store the result. */
7449 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7450 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7451# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
7452 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
7453# endif
7454
7455 /* Free and flush the PC register. */
7456 iemNativeRegFreeTmp(pReNative, idxPcReg);
7457 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7458 }
7459#endif
7460
7461#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7462 /* Save variables in volatile registers. */
7463 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7464 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
7465 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
7466 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7467#endif
7468
7469 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
7470 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
7471#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7472 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7473 {
7474 /*
7475 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
7476 *
7477 * @note There was a register variable assigned to the variable for the TlbLookup case above
7478 * which must not be freed or the value loaded into the register will not be synced into the register
7479 * further down the road because the variable doesn't know it had a variable assigned.
7480 *
7481 * @note For loads it is not required to sync what is in the assigned register with the stack slot
7482 * as it will be overwritten anyway.
7483 */
7484 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7485 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
7486 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
7487 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7488 }
7489 else
7490#endif
7491 if (enmOp == kIemNativeEmitMemOp_Store)
7492 {
7493 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7494 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
7495#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7496 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7497#else
7498 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7499 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7500#endif
7501 }
7502
7503 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
7504 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
7505#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7506 fVolGregMask);
7507#else
7508 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
7509#endif
7510
7511 if (iSegReg != UINT8_MAX)
7512 {
7513 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
7514 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7515 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
7516 }
7517
7518 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7519 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7520
7521 /* Done setting up parameters, make the call. */
7522 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7523
7524 /*
7525 * Put the result in the right register if this is a fetch.
7526 */
7527 if (enmOp != kIemNativeEmitMemOp_Store)
7528 {
7529#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7530 if ( cbMem == sizeof(RTUINT128U)
7531 || cbMem == sizeof(RTUINT256U))
7532 {
7533 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
7534
7535 /* Sync the value on the stack with the host register assigned to the variable. */
7536 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
7537 }
7538 else
7539#endif
7540 {
7541 Assert(idxRegValueFetch == pVarValue->idxReg);
7542 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7543 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
7544 }
7545 }
7546
7547#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7548 /* Restore variables and guest shadow registers to volatile registers. */
7549 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7550 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7551#endif
7552
7553#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7554 if (pReNative->Core.offPc)
7555 {
7556 /*
7557 * Time to restore the program counter to its original value.
7558 */
7559 /* Allocate a temporary PC register. */
7560 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7561 kIemNativeGstRegUse_ForUpdate);
7562
7563 /* Restore the original value. */
7564 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7565 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7566
7567 /* Free and flush the PC register. */
7568 iemNativeRegFreeTmp(pReNative, idxPcReg);
7569 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7570 }
7571#endif
7572
7573#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7574 if (!TlbState.fSkip)
7575 {
7576 /* end of TlbMiss - Jump to the done label. */
7577 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7578 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7579
7580 /*
7581 * TlbLookup:
7582 */
7583 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
7584 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
7585 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
7586
7587 /*
7588 * Emit code to do the actual storing / fetching.
7589 */
7590 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7591# ifdef IEM_WITH_TLB_STATISTICS
7592 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7593 enmOp == kIemNativeEmitMemOp_Store
7594 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
7595 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
7596# endif
7597 switch (enmOp)
7598 {
7599 case kIemNativeEmitMemOp_Store:
7600 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
7601 {
7602 switch (cbMem)
7603 {
7604 case 1:
7605 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7606 break;
7607 case 2:
7608 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7609 break;
7610 case 4:
7611 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7612 break;
7613 case 8:
7614 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7615 break;
7616#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7617 case sizeof(RTUINT128U):
7618 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7619 break;
7620 case sizeof(RTUINT256U):
7621 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7622 break;
7623#endif
7624 default:
7625 AssertFailed();
7626 }
7627 }
7628 else
7629 {
7630 switch (cbMem)
7631 {
7632 case 1:
7633 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
7634 idxRegMemResult, TlbState.idxReg1);
7635 break;
7636 case 2:
7637 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7638 idxRegMemResult, TlbState.idxReg1);
7639 break;
7640 case 4:
7641 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7642 idxRegMemResult, TlbState.idxReg1);
7643 break;
7644 case 8:
7645 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
7646 idxRegMemResult, TlbState.idxReg1);
7647 break;
7648 default:
7649 AssertFailed();
7650 }
7651 }
7652 break;
7653
7654 case kIemNativeEmitMemOp_Fetch:
7655 case kIemNativeEmitMemOp_Fetch_Zx_U16:
7656 case kIemNativeEmitMemOp_Fetch_Zx_U32:
7657 case kIemNativeEmitMemOp_Fetch_Zx_U64:
7658 switch (cbMem)
7659 {
7660 case 1:
7661 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7662 break;
7663 case 2:
7664 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7665 break;
7666 case 4:
7667 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7668 break;
7669 case 8:
7670 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7671 break;
7672#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7673 case sizeof(RTUINT128U):
7674 /*
7675 * No need to sync back the register with the stack, this is done by the generic variable handling
7676 * code if there is a register assigned to a variable and the stack must be accessed.
7677 */
7678 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7679 break;
7680 case sizeof(RTUINT256U):
7681 /*
7682 * No need to sync back the register with the stack, this is done by the generic variable handling
7683 * code if there is a register assigned to a variable and the stack must be accessed.
7684 */
7685 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7686 break;
7687#endif
7688 default:
7689 AssertFailed();
7690 }
7691 break;
7692
7693 case kIemNativeEmitMemOp_Fetch_Sx_U16:
7694 Assert(cbMem == 1);
7695 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7696 break;
7697
7698 case kIemNativeEmitMemOp_Fetch_Sx_U32:
7699 Assert(cbMem == 1 || cbMem == 2);
7700 if (cbMem == 1)
7701 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7702 else
7703 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7704 break;
7705
7706 case kIemNativeEmitMemOp_Fetch_Sx_U64:
7707 switch (cbMem)
7708 {
7709 case 1:
7710 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7711 break;
7712 case 2:
7713 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7714 break;
7715 case 4:
7716 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7717 break;
7718 default:
7719 AssertFailed();
7720 }
7721 break;
7722
7723 default:
7724 AssertFailed();
7725 }
7726
7727 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7728
7729 /*
7730 * TlbDone:
7731 */
7732 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7733
7734 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7735
7736# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7737 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7738 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7739# endif
7740 }
7741#else
7742 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
7743#endif
7744
7745 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7746 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7747 return off;
7748}
7749
7750
7751
7752/*********************************************************************************************************************************
7753* Memory fetches (IEM_MEM_FETCH_XXX). *
7754*********************************************************************************************************************************/
7755
7756/* 8-bit segmented: */
7757#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7758 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
7759 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7760 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7761
7762#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7763 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7764 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7765 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7766
7767#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7768 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7769 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7770 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7771
7772#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7773 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7774 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7775 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7776
7777#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7778 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7779 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7780 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7781
7782#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7783 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7784 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7785 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7786
7787#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7788 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7789 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7790 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7791
7792/* 16-bit segmented: */
7793#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7794 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7795 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7796 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7797
7798#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7799 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7800 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7801 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7802
7803#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7804 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7805 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7806 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7807
7808#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7809 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7810 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7811 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7812
7813#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7814 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7815 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7816 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7817
7818#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7819 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7820 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7821 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7822
7823
7824/* 32-bit segmented: */
7825#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7826 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7827 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7828 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7829
7830#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7831 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7832 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7833 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7834
7835#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7836 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7837 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7838 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7839
7840#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7841 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7842 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7843 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7844
7845#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7846 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7847 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7848 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7849
7850#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7851 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7852 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7853 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7854
7855#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7856 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7857 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7858 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7859
7860#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7861 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7862 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7863 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7864
7865#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7866 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
7867 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7868 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7869
7870AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7871#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7872 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
7873 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7874 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7875
7876
7877/* 64-bit segmented: */
7878#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7879 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7880 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7881 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7882
7883AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7884#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7885 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
7886 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7887 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7888
7889
7890/* 8-bit flat: */
7891#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7892 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
7893 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7894 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7895
7896#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7897 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7898 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7899 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7900
7901#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7902 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7903 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7904 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7905
7906#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7907 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7908 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7909 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7910
7911#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7912 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7913 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7914 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7915
7916#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7917 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7918 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7919 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7920
7921#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7922 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7923 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7924 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7925
7926
7927/* 16-bit flat: */
7928#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7929 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7930 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7931 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7932
7933#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7934 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7935 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7936 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7937
7938#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7939 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7940 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7941 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7942
7943#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7944 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7945 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7946 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7947
7948#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7949 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7950 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7951 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7952
7953#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7954 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7955 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7956 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7957
7958/* 32-bit flat: */
7959#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7960 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7961 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7962 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7963
7964#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7965 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7966 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7967 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7968
7969#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7970 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7971 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7972 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7973
7974#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7975 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7976 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7977 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7978
7979#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7980 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7981 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7982 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7983
7984#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7985 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7986 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7987 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7988
7989#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7990 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7991 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7992 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7993
7994#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7995 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7996 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7997 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7998
7999#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
8000 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
8001 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
8002 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
8003
8004#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
8005 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
8006 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
8007 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
8008
8009
8010/* 64-bit flat: */
8011#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
8012 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
8013 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
8014 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
8015
8016#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
8017 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
8018 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
8019 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
8020
8021#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8022/* 128-bit segmented: */
8023#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
8024 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
8025 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
8026 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
8027
8028#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
8029 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8030 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8031 kIemNativeEmitMemOp_Fetch, \
8032 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
8033
8034AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
8035#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
8036 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
8037 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8038 kIemNativeEmitMemOp_Fetch, \
8039 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
8040
8041#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
8042 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
8043 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
8044 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
8045
8046#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
8047 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
8048 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8049 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
8050
8051
8052/* 128-bit flat: */
8053#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
8054 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
8055 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
8056 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
8057
8058#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
8059 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8060 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8061 kIemNativeEmitMemOp_Fetch, \
8062 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
8063
8064#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
8065 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
8066 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8067 kIemNativeEmitMemOp_Fetch, \
8068 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
8069
8070#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
8071 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
8072 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
8073 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
8074
8075#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
8076 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
8077 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8078 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
8079
8080/* 256-bit segmented: */
8081#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
8082 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
8083 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8084 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8085
8086#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8087 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
8088 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8089 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8090
8091#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
8092 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
8093 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
8094 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8095
8096#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8097 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
8098 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8099 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8100
8101
8102/* 256-bit flat: */
8103#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
8104 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
8105 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8106 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8107
8108#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
8109 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
8110 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8111 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8112
8113#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
8114 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
8115 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
8116 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8117
8118#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
8119 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, \
8120 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8121 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8122
8123#endif
8124
8125
8126/*********************************************************************************************************************************
8127* Memory stores (IEM_MEM_STORE_XXX). *
8128*********************************************************************************************************************************/
8129
8130#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
8131 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
8132 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
8133 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8134
8135#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
8136 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
8137 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
8138 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8139
8140#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
8141 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
8142 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
8143 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8144
8145#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
8146 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
8147 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
8148 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8149
8150
8151#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
8152 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
8153 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
8154 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8155
8156#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
8157 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
8158 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
8159 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8160
8161#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
8162 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
8163 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
8164 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8165
8166#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
8167 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
8168 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
8169 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8170
8171
8172#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
8173 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8174 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8175
8176#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
8177 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8178 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8179
8180#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
8181 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8182 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8183
8184#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
8185 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8186 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8187
8188
8189#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
8190 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8191 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8192
8193#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
8194 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8195 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8196
8197#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
8198 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8199 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8200
8201#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
8202 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8203 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8204
8205/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
8206 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
8207DECL_INLINE_THROW(uint32_t)
8208iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
8209 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
8210{
8211 /*
8212 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
8213 * to do the grunt work.
8214 */
8215 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
8216 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
8217 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
8218 pfnFunction, idxInstr);
8219 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
8220 return off;
8221}
8222
8223
8224#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8225# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
8226 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8227 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8228 kIemNativeEmitMemOp_Store, \
8229 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
8230
8231# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
8232 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
8233 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
8234 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
8235
8236# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
8237 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
8238 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
8239 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
8240
8241# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
8242 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
8243 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8244 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8245
8246
8247# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
8248 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8249 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8250 kIemNativeEmitMemOp_Store, \
8251 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
8252
8253# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
8254 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
8255 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
8256 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
8257
8258# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
8259 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
8260 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
8261 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
8262
8263# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
8264 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
8265 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8266 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8267#endif
8268
8269
8270
8271/*********************************************************************************************************************************
8272* Stack Accesses. *
8273*********************************************************************************************************************************/
8274/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
8275#define IEM_MC_PUSH_U16(a_u16Value) \
8276 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8277 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
8278#define IEM_MC_PUSH_U32(a_u32Value) \
8279 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8280 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
8281#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
8282 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
8283 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
8284#define IEM_MC_PUSH_U64(a_u64Value) \
8285 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8286 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
8287
8288#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
8289 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8290 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8291#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
8292 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8293 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
8294#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
8295 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
8296 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
8297
8298#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
8299 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8300 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8301#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
8302 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8303 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
8304
8305
8306/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
8307DECL_INLINE_THROW(uint32_t)
8308iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
8309 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8310{
8311 /*
8312 * Assert sanity.
8313 */
8314 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8315 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
8316#ifdef VBOX_STRICT
8317 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8318 {
8319 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8320 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8321 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8322 Assert( pfnFunction
8323 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8324 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
8325 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
8326 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8327 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
8328 : UINT64_C(0xc000b000a0009000) ));
8329 }
8330 else
8331 Assert( pfnFunction
8332 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
8333 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
8334 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
8335 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
8336 : UINT64_C(0xc000b000a0009000) ));
8337#endif
8338
8339#ifdef VBOX_STRICT
8340 /*
8341 * Check that the fExec flags we've got make sense.
8342 */
8343 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8344#endif
8345
8346 /*
8347 * To keep things simple we have to commit any pending writes first as we
8348 * may end up making calls.
8349 */
8350 /** @todo we could postpone this till we make the call and reload the
8351 * registers after returning from the call. Not sure if that's sensible or
8352 * not, though. */
8353 off = iemNativeRegFlushPendingWrites(pReNative, off);
8354
8355 /*
8356 * First we calculate the new RSP and the effective stack pointer value.
8357 * For 64-bit mode and flat 32-bit these two are the same.
8358 * (Code structure is very similar to that of PUSH)
8359 */
8360 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8361 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
8362 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
8363 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
8364 ? cbMem : sizeof(uint16_t);
8365 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8366 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8367 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8368 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8369 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8370 if (cBitsFlat != 0)
8371 {
8372 Assert(idxRegEffSp == idxRegRsp);
8373 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8374 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8375 if (cBitsFlat == 64)
8376 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
8377 else
8378 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
8379 }
8380 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8381 {
8382 Assert(idxRegEffSp != idxRegRsp);
8383 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8384 kIemNativeGstRegUse_ReadOnly);
8385#ifdef RT_ARCH_AMD64
8386 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8387#else
8388 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8389#endif
8390 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8391 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8392 offFixupJumpToUseOtherBitSp = off;
8393 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8394 {
8395 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8396 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8397 }
8398 else
8399 {
8400 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8401 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8402 }
8403 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8404 }
8405 /* SpUpdateEnd: */
8406 uint32_t const offLabelSpUpdateEnd = off;
8407
8408 /*
8409 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8410 * we're skipping lookup).
8411 */
8412 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8413 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
8414 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8415 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8416 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8417 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8418 : UINT32_MAX;
8419 uint8_t const idxRegValue = !TlbState.fSkip
8420 && pVarValue->enmKind != kIemNativeVarKind_Immediate
8421 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
8422 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
8423 : UINT8_MAX;
8424 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
8425
8426
8427 if (!TlbState.fSkip)
8428 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8429 else
8430 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8431
8432 /*
8433 * Use16BitSp:
8434 */
8435 if (cBitsFlat == 0)
8436 {
8437#ifdef RT_ARCH_AMD64
8438 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8439#else
8440 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8441#endif
8442 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8443 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8444 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8445 else
8446 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8447 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8448 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8449 }
8450
8451 /*
8452 * TlbMiss:
8453 *
8454 * Call helper to do the pushing.
8455 */
8456 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8457
8458#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8459 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8460#else
8461 RT_NOREF(idxInstr);
8462#endif
8463
8464 /* Save variables in volatile registers. */
8465 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8466 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8467 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
8468 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
8469 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8470
8471 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
8472 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
8473 {
8474 /* Swap them using ARG0 as temp register: */
8475 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
8476 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
8477 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
8478 }
8479 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
8480 {
8481 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
8482 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
8483 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8484
8485 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
8486 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8487 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8488 }
8489 else
8490 {
8491 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
8492 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8493
8494 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
8495 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
8496 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
8497 }
8498
8499 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8500 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8501
8502 /* Done setting up parameters, make the call. */
8503 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8504
8505 /* Restore variables and guest shadow registers to volatile registers. */
8506 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8507 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8508
8509#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8510 if (!TlbState.fSkip)
8511 {
8512 /* end of TlbMiss - Jump to the done label. */
8513 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8514 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8515
8516 /*
8517 * TlbLookup:
8518 */
8519 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
8520 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8521
8522 /*
8523 * Emit code to do the actual storing / fetching.
8524 */
8525 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
8526# ifdef IEM_WITH_TLB_STATISTICS
8527 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8528 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8529# endif
8530 if (idxRegValue != UINT8_MAX)
8531 {
8532 switch (cbMemAccess)
8533 {
8534 case 2:
8535 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8536 break;
8537 case 4:
8538 if (!fIsIntelSeg)
8539 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8540 else
8541 {
8542 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
8543 PUSH FS in real mode, so we have to try emulate that here.
8544 We borrow the now unused idxReg1 from the TLB lookup code here. */
8545 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
8546 kIemNativeGstReg_EFlags);
8547 if (idxRegEfl != UINT8_MAX)
8548 {
8549#ifdef ARCH_AMD64
8550 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
8551 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8552 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8553#else
8554 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
8555 off, TlbState.idxReg1, idxRegEfl,
8556 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8557#endif
8558 iemNativeRegFreeTmp(pReNative, idxRegEfl);
8559 }
8560 else
8561 {
8562 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
8563 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8564 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8565 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8566 }
8567 /* ASSUMES the upper half of idxRegValue is ZERO. */
8568 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
8569 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
8570 }
8571 break;
8572 case 8:
8573 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8574 break;
8575 default:
8576 AssertFailed();
8577 }
8578 }
8579 else
8580 {
8581 switch (cbMemAccess)
8582 {
8583 case 2:
8584 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
8585 idxRegMemResult, TlbState.idxReg1);
8586 break;
8587 case 4:
8588 Assert(!fIsSegReg);
8589 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
8590 idxRegMemResult, TlbState.idxReg1);
8591 break;
8592 case 8:
8593 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
8594 break;
8595 default:
8596 AssertFailed();
8597 }
8598 }
8599
8600 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8601 TlbState.freeRegsAndReleaseVars(pReNative);
8602
8603 /*
8604 * TlbDone:
8605 *
8606 * Commit the new RSP value.
8607 */
8608 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8609 }
8610#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8611
8612#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8613 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
8614#endif
8615 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8616 if (idxRegEffSp != idxRegRsp)
8617 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8618
8619 /* The value variable is implictly flushed. */
8620 if (idxRegValue != UINT8_MAX)
8621 iemNativeVarRegisterRelease(pReNative, idxVarValue);
8622 iemNativeVarFreeLocal(pReNative, idxVarValue);
8623
8624 return off;
8625}
8626
8627
8628
8629/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
8630#define IEM_MC_POP_GREG_U16(a_iGReg) \
8631 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8632 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
8633#define IEM_MC_POP_GREG_U32(a_iGReg) \
8634 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8635 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
8636#define IEM_MC_POP_GREG_U64(a_iGReg) \
8637 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8638 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
8639
8640#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
8641 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8642 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8643#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
8644 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8645 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
8646
8647#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
8648 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8649 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8650#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
8651 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8652 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
8653
8654
8655DECL_FORCE_INLINE_THROW(uint32_t)
8656iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
8657 uint8_t idxRegTmp)
8658{
8659 /* Use16BitSp: */
8660#ifdef RT_ARCH_AMD64
8661 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8662 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
8663 RT_NOREF(idxRegTmp);
8664#else
8665 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
8666 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
8667 /* add tmp, regrsp, #cbMem */
8668 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
8669 /* and tmp, tmp, #0xffff */
8670 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
8671 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
8672 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
8673 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
8674#endif
8675 return off;
8676}
8677
8678
8679DECL_FORCE_INLINE(uint32_t)
8680iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
8681{
8682 /* Use32BitSp: */
8683 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8684 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
8685 return off;
8686}
8687
8688
8689/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
8690DECL_INLINE_THROW(uint32_t)
8691iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
8692 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8693{
8694 /*
8695 * Assert sanity.
8696 */
8697 Assert(idxGReg < 16);
8698#ifdef VBOX_STRICT
8699 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8700 {
8701 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8702 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8703 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8704 Assert( pfnFunction
8705 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8706 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
8707 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8708 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
8709 : UINT64_C(0xc000b000a0009000) ));
8710 }
8711 else
8712 Assert( pfnFunction
8713 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
8714 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
8715 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
8716 : UINT64_C(0xc000b000a0009000) ));
8717#endif
8718
8719#ifdef VBOX_STRICT
8720 /*
8721 * Check that the fExec flags we've got make sense.
8722 */
8723 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8724#endif
8725
8726 /*
8727 * To keep things simple we have to commit any pending writes first as we
8728 * may end up making calls.
8729 */
8730 off = iemNativeRegFlushPendingWrites(pReNative, off);
8731
8732 /*
8733 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
8734 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
8735 * directly as the effective stack pointer.
8736 * (Code structure is very similar to that of PUSH)
8737 */
8738 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8739 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8740 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8741 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8742 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8743 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8744 * will be the resulting register value. */
8745 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8746
8747 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8748 if (cBitsFlat != 0)
8749 {
8750 Assert(idxRegEffSp == idxRegRsp);
8751 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8752 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8753 }
8754 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8755 {
8756 Assert(idxRegEffSp != idxRegRsp);
8757 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8758 kIemNativeGstRegUse_ReadOnly);
8759#ifdef RT_ARCH_AMD64
8760 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8761#else
8762 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8763#endif
8764 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8765 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8766 offFixupJumpToUseOtherBitSp = off;
8767 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8768 {
8769/** @todo can skip idxRegRsp updating when popping ESP. */
8770 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8771 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8772 }
8773 else
8774 {
8775 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8776 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8777 }
8778 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8779 }
8780 /* SpUpdateEnd: */
8781 uint32_t const offLabelSpUpdateEnd = off;
8782
8783 /*
8784 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8785 * we're skipping lookup).
8786 */
8787 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8788 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8789 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8790 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8791 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8792 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8793 : UINT32_MAX;
8794
8795 if (!TlbState.fSkip)
8796 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8797 else
8798 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8799
8800 /*
8801 * Use16BitSp:
8802 */
8803 if (cBitsFlat == 0)
8804 {
8805#ifdef RT_ARCH_AMD64
8806 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8807#else
8808 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8809#endif
8810 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8811 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8812 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8813 else
8814 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8815 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8816 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8817 }
8818
8819 /*
8820 * TlbMiss:
8821 *
8822 * Call helper to do the pushing.
8823 */
8824 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8825
8826#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8827 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8828#else
8829 RT_NOREF(idxInstr);
8830#endif
8831
8832 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8833 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8834 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8835 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8836
8837
8838 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8839 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8840 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8841
8842 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8843 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8844
8845 /* Done setting up parameters, make the call. */
8846 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8847
8848 /* Move the return register content to idxRegMemResult. */
8849 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8850 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8851
8852 /* Restore variables and guest shadow registers to volatile registers. */
8853 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8854 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8855
8856#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8857 if (!TlbState.fSkip)
8858 {
8859 /* end of TlbMiss - Jump to the done label. */
8860 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8861 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8862
8863 /*
8864 * TlbLookup:
8865 */
8866 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
8867 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8868
8869 /*
8870 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8871 */
8872 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8873# ifdef IEM_WITH_TLB_STATISTICS
8874 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8875 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8876# endif
8877 switch (cbMem)
8878 {
8879 case 2:
8880 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8881 break;
8882 case 4:
8883 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8884 break;
8885 case 8:
8886 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8887 break;
8888 default:
8889 AssertFailed();
8890 }
8891
8892 TlbState.freeRegsAndReleaseVars(pReNative);
8893
8894 /*
8895 * TlbDone:
8896 *
8897 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8898 * commit the popped register value.
8899 */
8900 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8901 }
8902#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8903
8904 if (idxGReg != X86_GREG_xSP)
8905 {
8906 /* Set the register. */
8907 if (cbMem >= sizeof(uint32_t))
8908 {
8909#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8910 AssertMsg( pReNative->idxCurCall == 0
8911 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8912 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8913 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8914#endif
8915 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8916#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8917 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8918#endif
8919#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8920 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8921 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8922#endif
8923 }
8924 else
8925 {
8926 Assert(cbMem == sizeof(uint16_t));
8927 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8928 kIemNativeGstRegUse_ForUpdate);
8929 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8930#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8931 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8932#endif
8933 iemNativeRegFreeTmp(pReNative, idxRegDst);
8934 }
8935
8936 /* Complete RSP calculation for FLAT mode. */
8937 if (idxRegEffSp == idxRegRsp)
8938 {
8939 if (cBitsFlat == 64)
8940 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8941 else
8942 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8943 }
8944 }
8945 else
8946 {
8947 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8948 if (cbMem == sizeof(uint64_t))
8949 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8950 else if (cbMem == sizeof(uint32_t))
8951 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8952 else
8953 {
8954 if (idxRegEffSp == idxRegRsp)
8955 {
8956 if (cBitsFlat == 64)
8957 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8958 else
8959 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8960 }
8961 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8962 }
8963 }
8964
8965#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8966 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8967#endif
8968
8969 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8970 if (idxRegEffSp != idxRegRsp)
8971 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8972 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8973
8974 return off;
8975}
8976
8977
8978
8979/*********************************************************************************************************************************
8980* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8981*********************************************************************************************************************************/
8982
8983#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8984 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8985 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8986 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8987
8988#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8989 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8990 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8991 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8992
8993#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8994 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8995 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8996 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8997
8998#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8999 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9000 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
9001 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
9002
9003
9004#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9005 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9006 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9007 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
9008
9009#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9010 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9011 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9012 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
9013
9014#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9015 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9016 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9017 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
9018
9019#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9020 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9021 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9022 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
9023
9024#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9025 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
9026 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9027 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
9028
9029
9030#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9031 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9032 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9033 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
9034
9035#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9036 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9037 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9038 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
9039
9040#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9041 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9042 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9043 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
9044
9045#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9046 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9047 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9048 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
9049
9050#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9051 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
9052 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9053 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
9054
9055
9056#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9057 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9058 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9059 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
9060
9061#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9062 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9063 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9064 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
9065#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9066 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9067 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9068 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
9069
9070#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9071 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9072 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9073 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
9074
9075#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9076 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
9077 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9078 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
9079
9080
9081#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9082 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
9083 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9084 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
9085
9086#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9087 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
9088 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
9089 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
9090
9091
9092#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9093 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9094 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9095 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
9096
9097#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9098 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9099 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9100 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
9101
9102#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9103 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9104 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9105 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
9106
9107#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9108 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9109 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9110 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
9111
9112
9113
9114#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9115 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9116 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
9117 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
9118
9119#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9120 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9121 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
9122 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
9123
9124#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9125 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9126 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
9127 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
9128
9129#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9130 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9131 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
9132 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
9133
9134
9135#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9136 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9137 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9138 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
9139
9140#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9141 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9142 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9143 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9144
9145#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9146 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9147 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9148 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9149
9150#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9151 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9152 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9153 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9154
9155#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9156 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
9157 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9158 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9159
9160
9161#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9162 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9163 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9164 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
9165
9166#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9167 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9168 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9169 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9170
9171#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9172 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9173 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9174 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9175
9176#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9177 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9178 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9179 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9180
9181#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9182 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
9183 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9184 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9185
9186
9187#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9188 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9189 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9190 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
9191
9192#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9193 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9194 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9195 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9196
9197#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9198 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9199 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9200 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9201
9202#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9203 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9204 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9205 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9206
9207#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9208 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
9209 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9210 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9211
9212
9213#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9214 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9215 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9216 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9217
9218#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9219 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9220 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
9221 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9222
9223
9224#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9225 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9226 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9227 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
9228
9229#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9230 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9231 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9232 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9233
9234#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9235 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9236 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9237 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9238
9239#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9240 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9241 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9242 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9243
9244
9245DECL_INLINE_THROW(uint32_t)
9246iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9247 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
9248 uintptr_t pfnFunction, uint8_t idxInstr)
9249{
9250 /*
9251 * Assert sanity.
9252 */
9253 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9254 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
9255 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
9256 && pVarMem->cbVar == sizeof(void *),
9257 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9258
9259 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9260 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9261 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
9262 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
9263 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9264
9265 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
9266 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9267 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
9268 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
9269 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9270
9271 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
9272
9273 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9274
9275#ifdef VBOX_STRICT
9276# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
9277 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9278 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9279 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
9280 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9281# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9282 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
9283 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
9284 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
9285
9286 if (iSegReg == UINT8_MAX)
9287 {
9288 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9289 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9290 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9291 switch (cbMem)
9292 {
9293 case 1:
9294 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
9295 Assert(!fAlignMaskAndCtl);
9296 break;
9297 case 2:
9298 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
9299 Assert(fAlignMaskAndCtl < 2);
9300 break;
9301 case 4:
9302 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
9303 Assert(fAlignMaskAndCtl < 4);
9304 break;
9305 case 8:
9306 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
9307 Assert(fAlignMaskAndCtl < 8);
9308 break;
9309 case 10:
9310 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9311 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9312 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9313 Assert(fAlignMaskAndCtl < 8);
9314 break;
9315 case 16:
9316 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
9317 Assert(fAlignMaskAndCtl < 16);
9318 break;
9319# if 0
9320 case 32:
9321 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
9322 Assert(fAlignMaskAndCtl < 32);
9323 break;
9324 case 64:
9325 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
9326 Assert(fAlignMaskAndCtl < 64);
9327 break;
9328# endif
9329 default: AssertFailed(); break;
9330 }
9331 }
9332 else
9333 {
9334 Assert(iSegReg < 6);
9335 switch (cbMem)
9336 {
9337 case 1:
9338 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
9339 Assert(!fAlignMaskAndCtl);
9340 break;
9341 case 2:
9342 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
9343 Assert(fAlignMaskAndCtl < 2);
9344 break;
9345 case 4:
9346 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
9347 Assert(fAlignMaskAndCtl < 4);
9348 break;
9349 case 8:
9350 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
9351 Assert(fAlignMaskAndCtl < 8);
9352 break;
9353 case 10:
9354 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9355 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9356 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9357 Assert(fAlignMaskAndCtl < 8);
9358 break;
9359 case 16:
9360 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
9361 Assert(fAlignMaskAndCtl < 16);
9362 break;
9363# if 0
9364 case 32:
9365 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
9366 Assert(fAlignMaskAndCtl < 32);
9367 break;
9368 case 64:
9369 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
9370 Assert(fAlignMaskAndCtl < 64);
9371 break;
9372# endif
9373 default: AssertFailed(); break;
9374 }
9375 }
9376# undef IEM_MAP_HLP_FN
9377# undef IEM_MAP_HLP_FN_NO_AT
9378#endif
9379
9380#ifdef VBOX_STRICT
9381 /*
9382 * Check that the fExec flags we've got make sense.
9383 */
9384 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9385#endif
9386
9387 /*
9388 * To keep things simple we have to commit any pending writes first as we
9389 * may end up making calls.
9390 */
9391 off = iemNativeRegFlushPendingWrites(pReNative, off);
9392
9393#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9394 /*
9395 * Move/spill/flush stuff out of call-volatile registers.
9396 * This is the easy way out. We could contain this to the tlb-miss branch
9397 * by saving and restoring active stuff here.
9398 */
9399 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9400 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9401#endif
9402
9403 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
9404 while the tlb-miss codepath will temporarily put it on the stack.
9405 Set the the type to stack here so we don't need to do it twice below. */
9406 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9407 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
9408 /** @todo use a tmp register from TlbState, since they'll be free after tlb
9409 * lookup is done. */
9410
9411 /*
9412 * Define labels and allocate the result register (trying for the return
9413 * register if we can).
9414 */
9415 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9416 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9417 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
9418 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
9419 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
9420 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
9421 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
9422 : UINT32_MAX;
9423
9424 /*
9425 * Jump to the TLB lookup code.
9426 */
9427 if (!TlbState.fSkip)
9428 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
9429
9430 /*
9431 * TlbMiss:
9432 *
9433 * Call helper to do the fetching.
9434 * We flush all guest register shadow copies here.
9435 */
9436 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
9437
9438#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9439 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9440#else
9441 RT_NOREF(idxInstr);
9442#endif
9443
9444#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9445 /* Save variables in volatile registers. */
9446 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
9447 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
9448#endif
9449
9450 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
9451 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
9452#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9453 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
9454#else
9455 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9456#endif
9457
9458 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9459 if (iSegReg != UINT8_MAX)
9460 {
9461 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9462 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9463 }
9464
9465 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
9466 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
9467 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
9468
9469 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9470 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9471
9472 /* Done setting up parameters, make the call. */
9473 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9474
9475 /*
9476 * Put the output in the right registers.
9477 */
9478 Assert(idxRegMemResult == pVarMem->idxReg);
9479 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9480 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9481
9482#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9483 /* Restore variables and guest shadow registers to volatile registers. */
9484 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
9485 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
9486#endif
9487
9488 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
9489 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
9490
9491#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9492 if (!TlbState.fSkip)
9493 {
9494 /* end of tlbsmiss - Jump to the done label. */
9495 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9496 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
9497
9498 /*
9499 * TlbLookup:
9500 */
9501 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
9502 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
9503# ifdef IEM_WITH_TLB_STATISTICS
9504 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
9505 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
9506# endif
9507
9508 /* [idxVarUnmapInfo] = 0; */
9509 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
9510
9511 /*
9512 * TlbDone:
9513 */
9514 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9515
9516 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
9517
9518# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9519 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
9520 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9521# endif
9522 }
9523#else
9524 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
9525#endif
9526
9527 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9528 iemNativeVarRegisterRelease(pReNative, idxVarMem);
9529
9530 return off;
9531}
9532
9533
9534#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
9535 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
9536 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
9537
9538#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9539 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
9540 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
9541
9542#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9543 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
9544 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
9545
9546#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9547 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
9548 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
9549
9550DECL_INLINE_THROW(uint32_t)
9551iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9552 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
9553{
9554 /*
9555 * Assert sanity.
9556 */
9557 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9558#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
9559 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9560#endif
9561 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
9562 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9563 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9564#ifdef VBOX_STRICT
9565 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
9566 {
9567 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
9568 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
9569 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
9570 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9571 case IEM_ACCESS_TYPE_WRITE:
9572 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9573 case IEM_ACCESS_TYPE_READ:
9574 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9575 default: AssertFailed();
9576 }
9577#else
9578 RT_NOREF(fAccess);
9579#endif
9580
9581 /*
9582 * To keep things simple we have to commit any pending writes first as we
9583 * may end up making calls (there shouldn't be any at this point, so this
9584 * is just for consistency).
9585 */
9586 /** @todo we could postpone this till we make the call and reload the
9587 * registers after returning from the call. Not sure if that's sensible or
9588 * not, though. */
9589 off = iemNativeRegFlushPendingWrites(pReNative, off);
9590
9591 /*
9592 * Move/spill/flush stuff out of call-volatile registers.
9593 *
9594 * We exclude any register holding the bUnmapInfo variable, as we'll be
9595 * checking it after returning from the call and will free it afterwards.
9596 */
9597 /** @todo save+restore active registers and maybe guest shadows in miss
9598 * scenario. */
9599 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
9600 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
9601
9602 /*
9603 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9604 * to call the unmap helper function.
9605 *
9606 * The likelyhood of it being zero is higher than for the TLB hit when doing
9607 * the mapping, as a TLB miss for an well aligned and unproblematic memory
9608 * access should also end up with a mapping that won't need special unmapping.
9609 */
9610 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
9611 * should speed up things for the pure interpreter as well when TLBs
9612 * are enabled. */
9613#ifdef RT_ARCH_AMD64
9614 if (pVarUnmapInfo->idxReg == UINT8_MAX)
9615 {
9616 /* test byte [rbp - xxx], 0ffh */
9617 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9618 pbCodeBuf[off++] = 0xf6;
9619 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
9620 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9621 pbCodeBuf[off++] = 0xff;
9622 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9623 }
9624 else
9625#endif
9626 {
9627 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
9628 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
9629 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9630 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9631 }
9632 uint32_t const offJmpFixup = off;
9633 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
9634
9635 /*
9636 * Call the unmap helper function.
9637 */
9638#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9639 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9640#else
9641 RT_NOREF(idxInstr);
9642#endif
9643
9644 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
9645 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
9646 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9647
9648 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9649 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9650
9651 /* Done setting up parameters, make the call. */
9652 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9653
9654 /* The bUnmapInfo variable is implictly free by these MCs. */
9655 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
9656
9657 /*
9658 * Done, just fixup the jump for the non-call case.
9659 */
9660 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9661
9662 return off;
9663}
9664
9665
9666
9667/*********************************************************************************************************************************
9668* State and Exceptions *
9669*********************************************************************************************************************************/
9670
9671#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9672#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9673
9674#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9675#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9676#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9677
9678#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9679#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9680#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9681
9682
9683DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9684{
9685#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
9686 RT_NOREF(pReNative, fForChange);
9687#else
9688 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
9689 && fForChange)
9690 {
9691# ifdef RT_ARCH_AMD64
9692
9693 /* Need to save the host MXCSR the first time, and clear the exception flags. */
9694 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9695 {
9696 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9697
9698 /* stmxcsr */
9699 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9700 pbCodeBuf[off++] = X86_OP_REX_B;
9701 pbCodeBuf[off++] = 0x0f;
9702 pbCodeBuf[off++] = 0xae;
9703 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9704 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9705 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9706 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9707 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9708 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9709
9710 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9711 }
9712
9713 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9714 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9715
9716 /*
9717 * Mask any exceptions and clear the exception status and save into MXCSR,
9718 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
9719 * a register source/target (sigh).
9720 */
9721 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
9722 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
9723 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
9724 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9725
9726 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9727
9728 /* ldmxcsr */
9729 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9730 pbCodeBuf[off++] = X86_OP_REX_B;
9731 pbCodeBuf[off++] = 0x0f;
9732 pbCodeBuf[off++] = 0xae;
9733 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9734 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9735 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9736 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9737 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9738 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9739
9740 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9741 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9742
9743# elif defined(RT_ARCH_ARM64)
9744 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9745
9746 /* Need to save the host floating point control register the first time, clear FPSR. */
9747 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9748 {
9749 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9750 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9751 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9752 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9753 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9754 }
9755
9756 /*
9757 * Translate MXCSR to FPCR.
9758 *
9759 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9760 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9761 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9762 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9763 */
9764 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9765 * and implement alternate handling if FEAT_AFP is present. */
9766 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9767
9768 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9769
9770 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9771 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9772
9773 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9774 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9775 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9776 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9777 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9778
9779 /*
9780 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9781 *
9782 * Value MXCSR FPCR
9783 * 0 RN RN
9784 * 1 R- R+
9785 * 2 R+ R-
9786 * 3 RZ RZ
9787 *
9788 * Conversion can be achieved by switching bit positions
9789 */
9790 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9791 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9792 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9793 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9794
9795 /* Write the value to FPCR. */
9796 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9797
9798 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9799 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9800 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9801# else
9802# error "Port me"
9803# endif
9804 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9805 }
9806#endif
9807 return off;
9808}
9809
9810
9811
9812/*********************************************************************************************************************************
9813* Emitters for FPU related operations. *
9814*********************************************************************************************************************************/
9815
9816#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9817 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9818
9819/** Emits code for IEM_MC_FETCH_FCW. */
9820DECL_INLINE_THROW(uint32_t)
9821iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9822{
9823 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9824 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9825
9826 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9827
9828 /* Allocate a temporary FCW register. */
9829 /** @todo eliminate extra register */
9830 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9831 kIemNativeGstRegUse_ReadOnly);
9832
9833 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9834
9835 /* Free but don't flush the FCW register. */
9836 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9837 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9838
9839 return off;
9840}
9841
9842
9843#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9844 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9845
9846/** Emits code for IEM_MC_FETCH_FSW. */
9847DECL_INLINE_THROW(uint32_t)
9848iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9849{
9850 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9851 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9852
9853 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
9854 /* Allocate a temporary FSW register. */
9855 /** @todo eliminate extra register */
9856 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9857 kIemNativeGstRegUse_ReadOnly);
9858
9859 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9860
9861 /* Free but don't flush the FSW register. */
9862 iemNativeRegFreeTmp(pReNative, idxFswReg);
9863 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9864
9865 return off;
9866}
9867
9868
9869
9870#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9871
9872
9873/*********************************************************************************************************************************
9874* Emitters for SSE/AVX specific operations. *
9875*********************************************************************************************************************************/
9876
9877#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9878 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9879
9880/** Emits code for IEM_MC_COPY_XREG_U128. */
9881DECL_INLINE_THROW(uint32_t)
9882iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9883{
9884 /* This is a nop if the source and destination register are the same. */
9885 if (iXRegDst != iXRegSrc)
9886 {
9887 /* Allocate destination and source register. */
9888 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9889 kIemNativeGstSimdRegLdStSz_Low128,
9890 kIemNativeGstRegUse_ForFullWrite);
9891 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9892 kIemNativeGstSimdRegLdStSz_Low128,
9893 kIemNativeGstRegUse_ReadOnly);
9894
9895 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9896
9897 /* Free but don't flush the source and destination register. */
9898 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9899 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9900 }
9901
9902 return off;
9903}
9904
9905
9906#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9907 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9908
9909/** Emits code for IEM_MC_FETCH_XREG_U128. */
9910DECL_INLINE_THROW(uint32_t)
9911iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9912{
9913 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9914 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9915
9916 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9917 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9918
9919 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9920
9921 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9922
9923 /* Free but don't flush the source register. */
9924 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9925 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9926
9927 return off;
9928}
9929
9930
9931#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9932 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9933
9934#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9935 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9936
9937/** Emits code for IEM_MC_FETCH_XREG_U64. */
9938DECL_INLINE_THROW(uint32_t)
9939iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9940{
9941 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9942 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9943
9944 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9945 kIemNativeGstSimdRegLdStSz_Low128,
9946 kIemNativeGstRegUse_ReadOnly);
9947
9948 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9949 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9950
9951 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9952
9953 /* Free but don't flush the source register. */
9954 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9955 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9956
9957 return off;
9958}
9959
9960
9961#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9962 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9963
9964#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9965 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9966
9967/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9968DECL_INLINE_THROW(uint32_t)
9969iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9970{
9971 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9972 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9973
9974 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9975 kIemNativeGstSimdRegLdStSz_Low128,
9976 kIemNativeGstRegUse_ReadOnly);
9977
9978 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9979 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9980
9981 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9982
9983 /* Free but don't flush the source register. */
9984 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9985 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9986
9987 return off;
9988}
9989
9990
9991#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9992 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9993
9994/** Emits code for IEM_MC_FETCH_XREG_U16. */
9995DECL_INLINE_THROW(uint32_t)
9996iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9997{
9998 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9999 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
10000
10001 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10002 kIemNativeGstSimdRegLdStSz_Low128,
10003 kIemNativeGstRegUse_ReadOnly);
10004
10005 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10006 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10007
10008 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
10009
10010 /* Free but don't flush the source register. */
10011 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10012 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10013
10014 return off;
10015}
10016
10017
10018#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
10019 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
10020
10021/** Emits code for IEM_MC_FETCH_XREG_U8. */
10022DECL_INLINE_THROW(uint32_t)
10023iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
10024{
10025 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10026 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
10027
10028 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10029 kIemNativeGstSimdRegLdStSz_Low128,
10030 kIemNativeGstRegUse_ReadOnly);
10031
10032 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10033 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10034
10035 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
10036
10037 /* Free but don't flush the source register. */
10038 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10039 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10040
10041 return off;
10042}
10043
10044
10045#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
10046 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
10047
10048AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
10049#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
10050 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
10051
10052
10053/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
10054DECL_INLINE_THROW(uint32_t)
10055iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10056{
10057 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10058 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10059
10060 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10061 kIemNativeGstSimdRegLdStSz_Low128,
10062 kIemNativeGstRegUse_ForFullWrite);
10063 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10064
10065 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10066
10067 /* Free but don't flush the source register. */
10068 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10069 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10070
10071 return off;
10072}
10073
10074
10075#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
10076 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
10077
10078#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
10079 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
10080
10081#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
10082 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
10083
10084#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
10085 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
10086
10087#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
10088 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
10089
10090#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
10091 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
10092
10093/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
10094DECL_INLINE_THROW(uint32_t)
10095iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
10096 uint8_t cbLocal, uint8_t iElem)
10097{
10098 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10099 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
10100
10101#ifdef VBOX_STRICT
10102 switch (cbLocal)
10103 {
10104 case sizeof(uint64_t): Assert(iElem < 2); break;
10105 case sizeof(uint32_t): Assert(iElem < 4); break;
10106 case sizeof(uint16_t): Assert(iElem < 8); break;
10107 case sizeof(uint8_t): Assert(iElem < 16); break;
10108 default: AssertFailed();
10109 }
10110#endif
10111
10112 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10113 kIemNativeGstSimdRegLdStSz_Low128,
10114 kIemNativeGstRegUse_ForUpdate);
10115 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10116
10117 switch (cbLocal)
10118 {
10119 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10120 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10121 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10122 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10123 default: AssertFailed();
10124 }
10125
10126 /* Free but don't flush the source register. */
10127 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10128 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10129
10130 return off;
10131}
10132
10133
10134#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
10135 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
10136
10137/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
10138DECL_INLINE_THROW(uint32_t)
10139iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10140{
10141 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10142 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10143
10144 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10145 kIemNativeGstSimdRegLdStSz_Low128,
10146 kIemNativeGstRegUse_ForUpdate);
10147 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10148
10149 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
10150 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10151 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10152
10153 /* Free but don't flush the source register. */
10154 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10155 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10156
10157 return off;
10158}
10159
10160
10161#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
10162 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
10163
10164/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
10165DECL_INLINE_THROW(uint32_t)
10166iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10167{
10168 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10169 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10170
10171 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10172 kIemNativeGstSimdRegLdStSz_Low128,
10173 kIemNativeGstRegUse_ForUpdate);
10174 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10175
10176 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
10177 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10178 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10179
10180 /* Free but don't flush the source register. */
10181 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10182 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10183
10184 return off;
10185}
10186
10187
10188#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
10189 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
10190
10191/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
10192DECL_INLINE_THROW(uint32_t)
10193iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
10194 uint8_t idxSrcVar, uint8_t iDwSrc)
10195{
10196 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10197 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10198
10199 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10200 kIemNativeGstSimdRegLdStSz_Low128,
10201 kIemNativeGstRegUse_ForUpdate);
10202 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10203
10204 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
10205 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
10206
10207 /* Free but don't flush the destination register. */
10208 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10209 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10210
10211 return off;
10212}
10213
10214
10215#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10216 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10217
10218/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
10219DECL_INLINE_THROW(uint32_t)
10220iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10221{
10222 /*
10223 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10224 * if iYRegDst gets allocated first for the full write it won't load the
10225 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10226 * duplicated from the already allocated host register for iYRegDst containing
10227 * garbage. This will be catched by the guest register value checking in debug
10228 * builds.
10229 */
10230 if (iYRegDst != iYRegSrc)
10231 {
10232 /* Allocate destination and source register. */
10233 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10234 kIemNativeGstSimdRegLdStSz_256,
10235 kIemNativeGstRegUse_ForFullWrite);
10236 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10237 kIemNativeGstSimdRegLdStSz_Low128,
10238 kIemNativeGstRegUse_ReadOnly);
10239
10240 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10241 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10242
10243 /* Free but don't flush the source and destination register. */
10244 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10245 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10246 }
10247 else
10248 {
10249 /* This effectively only clears the upper 128-bits of the register. */
10250 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10251 kIemNativeGstSimdRegLdStSz_High128,
10252 kIemNativeGstRegUse_ForFullWrite);
10253
10254 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10255
10256 /* Free but don't flush the destination register. */
10257 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10258 }
10259
10260 return off;
10261}
10262
10263
10264#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10265 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10266
10267/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
10268DECL_INLINE_THROW(uint32_t)
10269iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10270{
10271 /*
10272 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10273 * if iYRegDst gets allocated first for the full write it won't load the
10274 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10275 * duplicated from the already allocated host register for iYRegDst containing
10276 * garbage. This will be catched by the guest register value checking in debug
10277 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
10278 * for a zmm register we don't support yet, so this is just a nop.
10279 */
10280 if (iYRegDst != iYRegSrc)
10281 {
10282 /* Allocate destination and source register. */
10283 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10284 kIemNativeGstSimdRegLdStSz_256,
10285 kIemNativeGstRegUse_ReadOnly);
10286 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10287 kIemNativeGstSimdRegLdStSz_256,
10288 kIemNativeGstRegUse_ForFullWrite);
10289
10290 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10291
10292 /* Free but don't flush the source and destination register. */
10293 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10294 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10295 }
10296
10297 return off;
10298}
10299
10300
10301#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
10302 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
10303
10304/** Emits code for IEM_MC_FETCH_YREG_U128. */
10305DECL_INLINE_THROW(uint32_t)
10306iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
10307{
10308 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10309 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10310
10311 Assert(iDQWord <= 1);
10312 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10313 iDQWord == 1
10314 ? kIemNativeGstSimdRegLdStSz_High128
10315 : kIemNativeGstSimdRegLdStSz_Low128,
10316 kIemNativeGstRegUse_ReadOnly);
10317
10318 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10319 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10320
10321 if (iDQWord == 1)
10322 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10323 else
10324 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10325
10326 /* Free but don't flush the source register. */
10327 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10328 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10329
10330 return off;
10331}
10332
10333
10334#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
10335 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
10336
10337/** Emits code for IEM_MC_FETCH_YREG_U64. */
10338DECL_INLINE_THROW(uint32_t)
10339iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
10340{
10341 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10342 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10343
10344 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10345 iQWord >= 2
10346 ? kIemNativeGstSimdRegLdStSz_High128
10347 : kIemNativeGstSimdRegLdStSz_Low128,
10348 kIemNativeGstRegUse_ReadOnly);
10349
10350 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10351 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10352
10353 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10354
10355 /* Free but don't flush the source register. */
10356 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10357 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10358
10359 return off;
10360}
10361
10362
10363#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
10364 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
10365
10366/** Emits code for IEM_MC_FETCH_YREG_U32. */
10367DECL_INLINE_THROW(uint32_t)
10368iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
10369{
10370 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10371 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10372
10373 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10374 iDWord >= 4
10375 ? kIemNativeGstSimdRegLdStSz_High128
10376 : kIemNativeGstSimdRegLdStSz_Low128,
10377 kIemNativeGstRegUse_ReadOnly);
10378
10379 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10380 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10381
10382 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10383
10384 /* Free but don't flush the source register. */
10385 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10386 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10387
10388 return off;
10389}
10390
10391
10392#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
10393 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
10394
10395/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
10396DECL_INLINE_THROW(uint32_t)
10397iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10398{
10399 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10400 kIemNativeGstSimdRegLdStSz_High128,
10401 kIemNativeGstRegUse_ForFullWrite);
10402
10403 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10404
10405 /* Free but don't flush the register. */
10406 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10407
10408 return off;
10409}
10410
10411
10412#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
10413 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
10414
10415/** Emits code for IEM_MC_STORE_YREG_U128. */
10416DECL_INLINE_THROW(uint32_t)
10417iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
10418{
10419 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10420 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10421
10422 Assert(iDQword <= 1);
10423 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10424 iDQword == 0
10425 ? kIemNativeGstSimdRegLdStSz_Low128
10426 : kIemNativeGstSimdRegLdStSz_High128,
10427 kIemNativeGstRegUse_ForFullWrite);
10428
10429 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10430
10431 if (iDQword == 0)
10432 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10433 else
10434 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
10435
10436 /* Free but don't flush the source register. */
10437 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10438 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10439
10440 return off;
10441}
10442
10443
10444#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10445 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10446
10447/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
10448DECL_INLINE_THROW(uint32_t)
10449iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10450{
10451 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10452 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10453
10454 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10455 kIemNativeGstSimdRegLdStSz_256,
10456 kIemNativeGstRegUse_ForFullWrite);
10457
10458 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10459
10460 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10461 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10462
10463 /* Free but don't flush the source register. */
10464 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10465 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10466
10467 return off;
10468}
10469
10470
10471#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
10472 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
10473
10474/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
10475DECL_INLINE_THROW(uint32_t)
10476iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10477{
10478 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10479 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10480
10481 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10482 kIemNativeGstSimdRegLdStSz_256,
10483 kIemNativeGstRegUse_ForFullWrite);
10484
10485 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10486
10487 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10488 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10489
10490 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10491 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10492
10493 return off;
10494}
10495
10496
10497#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
10498 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
10499
10500/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
10501DECL_INLINE_THROW(uint32_t)
10502iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10503{
10504 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10505 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10506
10507 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10508 kIemNativeGstSimdRegLdStSz_256,
10509 kIemNativeGstRegUse_ForFullWrite);
10510
10511 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10512
10513 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10514 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10515
10516 /* Free but don't flush the source register. */
10517 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10518 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10519
10520 return off;
10521}
10522
10523
10524#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
10525 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
10526
10527/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
10528DECL_INLINE_THROW(uint32_t)
10529iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10530{
10531 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10532 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10533
10534 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10535 kIemNativeGstSimdRegLdStSz_256,
10536 kIemNativeGstRegUse_ForFullWrite);
10537
10538 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10539
10540 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10541 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10542
10543 /* Free but don't flush the source register. */
10544 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10545 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10546
10547 return off;
10548}
10549
10550
10551#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
10552 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
10553
10554/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
10555DECL_INLINE_THROW(uint32_t)
10556iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10557{
10558 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10559 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10560
10561 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10562 kIemNativeGstSimdRegLdStSz_256,
10563 kIemNativeGstRegUse_ForFullWrite);
10564
10565 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10566
10567 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10568 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10569
10570 /* Free but don't flush the source register. */
10571 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10572 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10573
10574 return off;
10575}
10576
10577
10578#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
10579 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
10580
10581/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
10582DECL_INLINE_THROW(uint32_t)
10583iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10584{
10585 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10586 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10587
10588 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10589 kIemNativeGstSimdRegLdStSz_256,
10590 kIemNativeGstRegUse_ForFullWrite);
10591
10592 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10593
10594 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10595
10596 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10597 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10598
10599 return off;
10600}
10601
10602
10603#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
10604 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
10605
10606/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
10607DECL_INLINE_THROW(uint32_t)
10608iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10609{
10610 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10611 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10612
10613 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10614 kIemNativeGstSimdRegLdStSz_256,
10615 kIemNativeGstRegUse_ForFullWrite);
10616
10617 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10618
10619 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10620
10621 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10622 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10623
10624 return off;
10625}
10626
10627
10628#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10629 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10630
10631/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
10632DECL_INLINE_THROW(uint32_t)
10633iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10634{
10635 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10636 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10637
10638 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10639 kIemNativeGstSimdRegLdStSz_256,
10640 kIemNativeGstRegUse_ForFullWrite);
10641
10642 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10643
10644 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10645
10646 /* Free but don't flush the source register. */
10647 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10648 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10649
10650 return off;
10651}
10652
10653
10654#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10655 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10656
10657/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
10658DECL_INLINE_THROW(uint32_t)
10659iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10660{
10661 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10662 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10663
10664 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10665 kIemNativeGstSimdRegLdStSz_256,
10666 kIemNativeGstRegUse_ForFullWrite);
10667
10668 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10669
10670 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10671
10672 /* Free but don't flush the source register. */
10673 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10674 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10675
10676 return off;
10677}
10678
10679
10680#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10681 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10682
10683/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
10684DECL_INLINE_THROW(uint32_t)
10685iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10686{
10687 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10688 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10689
10690 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10691 kIemNativeGstSimdRegLdStSz_256,
10692 kIemNativeGstRegUse_ForFullWrite);
10693
10694 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10695
10696 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
10697
10698 /* Free but don't flush the source register. */
10699 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10700 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10701
10702 return off;
10703}
10704
10705
10706#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10707 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10708
10709/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
10710DECL_INLINE_THROW(uint32_t)
10711iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10712{
10713 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10714 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10715
10716 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10717 kIemNativeGstSimdRegLdStSz_256,
10718 kIemNativeGstRegUse_ForFullWrite);
10719
10720 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10721
10722 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10723 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
10724
10725 /* Free but don't flush the source register. */
10726 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10727 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10728
10729 return off;
10730}
10731
10732
10733#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10734 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10735
10736/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
10737DECL_INLINE_THROW(uint32_t)
10738iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10739{
10740 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10741 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10742
10743 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10744 kIemNativeGstSimdRegLdStSz_256,
10745 kIemNativeGstRegUse_ForFullWrite);
10746
10747 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10748
10749 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10750 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10751
10752 /* Free but don't flush the source register. */
10753 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10754 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10755
10756 return off;
10757}
10758
10759
10760#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10761 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10762
10763/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10764DECL_INLINE_THROW(uint32_t)
10765iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10766{
10767 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10768 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10769
10770 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10771 kIemNativeGstSimdRegLdStSz_256,
10772 kIemNativeGstRegUse_ForFullWrite);
10773 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10774 kIemNativeGstSimdRegLdStSz_Low128,
10775 kIemNativeGstRegUse_ReadOnly);
10776 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10777
10778 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10779 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10780 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10781
10782 /* Free but don't flush the source and destination registers. */
10783 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10784 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10785 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10786
10787 return off;
10788}
10789
10790
10791#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10792 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10793
10794/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10795DECL_INLINE_THROW(uint32_t)
10796iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10797{
10798 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10799 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10800
10801 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10802 kIemNativeGstSimdRegLdStSz_256,
10803 kIemNativeGstRegUse_ForFullWrite);
10804 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10805 kIemNativeGstSimdRegLdStSz_Low128,
10806 kIemNativeGstRegUse_ReadOnly);
10807 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10808
10809 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10810 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10811 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10812
10813 /* Free but don't flush the source and destination registers. */
10814 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10815 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10816 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10817
10818 return off;
10819}
10820
10821
10822#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10823 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10824
10825
10826/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10827DECL_INLINE_THROW(uint32_t)
10828iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10829{
10830 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10831 kIemNativeGstSimdRegLdStSz_Low128,
10832 kIemNativeGstRegUse_ForUpdate);
10833
10834 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10835 if (bImm8Mask & RT_BIT(0))
10836 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10837 if (bImm8Mask & RT_BIT(1))
10838 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10839 if (bImm8Mask & RT_BIT(2))
10840 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10841 if (bImm8Mask & RT_BIT(3))
10842 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10843
10844 /* Free but don't flush the destination register. */
10845 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10846
10847 return off;
10848}
10849
10850
10851#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10852 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10853
10854#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10855 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10856
10857/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10858DECL_INLINE_THROW(uint32_t)
10859iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10860{
10861 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10862 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10863
10864 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10865 kIemNativeGstSimdRegLdStSz_256,
10866 kIemNativeGstRegUse_ReadOnly);
10867 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10868
10869 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10870
10871 /* Free but don't flush the source register. */
10872 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10873 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10874
10875 return off;
10876}
10877
10878
10879#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10880 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10881
10882#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10883 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10884
10885/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10886DECL_INLINE_THROW(uint32_t)
10887iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10888{
10889 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10890 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10891
10892 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10893 kIemNativeGstSimdRegLdStSz_256,
10894 kIemNativeGstRegUse_ForFullWrite);
10895 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10896
10897 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10898
10899 /* Free but don't flush the source register. */
10900 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10901 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10902
10903 return off;
10904}
10905
10906
10907#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10908 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10909
10910
10911/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10912DECL_INLINE_THROW(uint32_t)
10913iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10914 uint8_t idxSrcVar, uint8_t iDwSrc)
10915{
10916 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10917 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10918
10919 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10920 iDwDst < 4
10921 ? kIemNativeGstSimdRegLdStSz_Low128
10922 : kIemNativeGstSimdRegLdStSz_High128,
10923 kIemNativeGstRegUse_ForUpdate);
10924 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10925 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10926
10927 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10928 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10929
10930 /* Free but don't flush the source register. */
10931 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10932 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10933 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10934
10935 return off;
10936}
10937
10938
10939#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10940 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10941
10942
10943/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10944DECL_INLINE_THROW(uint32_t)
10945iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10946 uint8_t idxSrcVar, uint8_t iQwSrc)
10947{
10948 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10949 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10950
10951 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10952 iQwDst < 2
10953 ? kIemNativeGstSimdRegLdStSz_Low128
10954 : kIemNativeGstSimdRegLdStSz_High128,
10955 kIemNativeGstRegUse_ForUpdate);
10956 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10957 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10958
10959 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10960 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10961
10962 /* Free but don't flush the source register. */
10963 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10964 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10965 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10966
10967 return off;
10968}
10969
10970
10971#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10972 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10973
10974
10975/** Emits code for IEM_MC_STORE_YREG_U64. */
10976DECL_INLINE_THROW(uint32_t)
10977iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10978{
10979 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10980 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10981
10982 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10983 iQwDst < 2
10984 ? kIemNativeGstSimdRegLdStSz_Low128
10985 : kIemNativeGstSimdRegLdStSz_High128,
10986 kIemNativeGstRegUse_ForUpdate);
10987
10988 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10989
10990 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10991
10992 /* Free but don't flush the source register. */
10993 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10994 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10995
10996 return off;
10997}
10998
10999
11000#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
11001 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
11002
11003/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
11004DECL_INLINE_THROW(uint32_t)
11005iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
11006{
11007 RT_NOREF(pReNative, iYReg);
11008 /** @todo Needs to be implemented when support for AVX-512 is added. */
11009 return off;
11010}
11011
11012
11013
11014/*********************************************************************************************************************************
11015* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
11016*********************************************************************************************************************************/
11017
11018/**
11019 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
11020 */
11021DECL_INLINE_THROW(uint32_t)
11022iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
11023{
11024 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
11025 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
11026 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
11027 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
11028
11029#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
11030 /*
11031 * Need to do the FPU preparation.
11032 */
11033 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
11034#endif
11035
11036 /*
11037 * Do all the call setup and cleanup.
11038 */
11039 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
11040 false /*fFlushPendingWrites*/);
11041
11042 /*
11043 * Load the MXCSR register into the first argument and mask out the current exception flags.
11044 */
11045 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
11046 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
11047
11048 /*
11049 * Make the call.
11050 */
11051 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
11052
11053 /*
11054 * The updated MXCSR is in the return register, update exception status flags.
11055 *
11056 * The return register is marked allocated as a temporary because it is required for the
11057 * exception generation check below.
11058 */
11059 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
11060 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
11061 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
11062
11063#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
11064 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
11065 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
11066#endif
11067
11068 /*
11069 * Make sure we don't have any outstanding guest register writes as we may
11070 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
11071 */
11072 off = iemNativeRegFlushPendingWrites(pReNative, off);
11073
11074#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11075 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11076#else
11077 RT_NOREF(idxInstr);
11078#endif
11079
11080 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
11081 * want to assume the existence for this instruction at the moment. */
11082 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
11083
11084 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
11085 /* tmp &= X86_MXCSR_XCPT_MASK */
11086 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
11087 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
11088 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
11089 /* tmp = ~tmp */
11090 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
11091 /* tmp &= mxcsr */
11092 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
11093 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
11094 kIemNativeLabelType_RaiseSseAvxFpRelated);
11095
11096 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
11097 iemNativeRegFreeTmp(pReNative, idxRegTmp);
11098 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
11099
11100 return off;
11101}
11102
11103
11104#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
11105 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11106
11107/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
11108DECL_INLINE_THROW(uint32_t)
11109iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11110{
11111 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11112 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11113 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11114}
11115
11116
11117#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11118 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11119
11120/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
11121DECL_INLINE_THROW(uint32_t)
11122iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11123 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11124{
11125 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11126 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11127 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11128 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11129}
11130
11131
11132/*********************************************************************************************************************************
11133* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
11134*********************************************************************************************************************************/
11135
11136#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
11137 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11138
11139/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
11140DECL_INLINE_THROW(uint32_t)
11141iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11142{
11143 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11144 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11145 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11146}
11147
11148
11149#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11150 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11151
11152/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
11153DECL_INLINE_THROW(uint32_t)
11154iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11155 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11156{
11157 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11158 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11159 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11160 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11161}
11162
11163
11164#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
11165
11166
11167/*********************************************************************************************************************************
11168* Include instruction emitters. *
11169*********************************************************************************************************************************/
11170#include "target-x86/IEMAllN8veEmit-x86.h"
11171
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette