VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 106296

Last change on this file since 106296 was 106196, checked in by vboxsync, 2 months ago

VMM/IEM: Added missing skipped eflags tracking in iemNativeEmitModifyEFlagsBit. More logging. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 553.8 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 106196 2024-10-01 13:50:48Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62#include "target-x86/IEMAllN8veEmit-x86.h"
63
64
65/*
66 * Narrow down configs here to avoid wasting time on unused configs here.
67 * Note! Same checks in IEMAllThrdRecompiler.cpp.
68 */
69
70#ifndef IEM_WITH_CODE_TLB
71# error The code TLB must be enabled for the recompiler.
72#endif
73
74#ifndef IEM_WITH_DATA_TLB
75# error The data TLB must be enabled for the recompiler.
76#endif
77
78#ifndef IEM_WITH_SETJMP
79# error The setjmp approach must be enabled for the recompiler.
80#endif
81
82#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
83# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
84#endif
85
86
87/*********************************************************************************************************************************
88* Code emitters for flushing pending guest register writes and sanity checks *
89*********************************************************************************************************************************/
90
91#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
92
93# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
94/**
95 * Updates IEMCPU::uPcUpdatingDebug.
96 */
97DECL_INLINE_THROW(uint32_t) iemNativeEmitPcDebugAdd(PIEMRECOMPILERSTATE pReNative, uint32_t off, int64_t offDisp, uint8_t cBits)
98{
99# ifdef RT_ARCH_AMD64
100 if (pReNative->Core.fDebugPcInitialized && cBits >= 32)
101 {
102 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
103 if ((int32_t)offDisp == offDisp || cBits != 64)
104 {
105 /* add [q]word [pVCpu->iem.s.uPcUpdatingDebug], imm32/imm8 */
106 if (cBits == 64)
107 pCodeBuf[off++] = X86_OP_REX_W;
108 pCodeBuf[off++] = (int8_t)offDisp == offDisp ? 0x83 : 0x81;
109 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
110 if ((int8_t)offDisp == offDisp)
111 pCodeBuf[off++] = (int8_t)offDisp;
112 else
113 {
114 *(int32_t *)&pCodeBuf[off] = (int32_t)offDisp;
115 off += sizeof(int32_t);
116 }
117 }
118 else
119 {
120 /* mov tmp0, imm64 */
121 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, offDisp);
122
123 /* add [pVCpu->iem.s.uPcUpdatingDebug], tmp0 */
124 if (cBits == 64)
125 pCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
126 else if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
127 pCodeBuf[off++] = X86_OP_REX_R;
128 pCodeBuf[off++] = 0x01;
129 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0 & 7,
130 RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
131 }
132 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
133 return off;
134 }
135# endif
136
137 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
138 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 12);
139
140 if (pReNative->Core.fDebugPcInitialized)
141 {
142 Log4(("uPcUpdatingDebug+=%ld cBits=%d off=%#x\n", offDisp, cBits, off));
143 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
144 }
145 else
146 {
147 Log4(("uPcUpdatingDebug=rip+%ld cBits=%d off=%#x\n", offDisp, cBits, off));
148 pReNative->Core.fDebugPcInitialized = true;
149 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
150 }
151
152 if (cBits == 64)
153 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxTmpReg, offDisp, IEMNATIVE_REG_FIXED_TMP0);
154 else
155 {
156 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxTmpReg, (int32_t)offDisp, IEMNATIVE_REG_FIXED_TMP0);
157 if (cBits == 16)
158 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, UINT16_MAX);
159 }
160
161 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug),
162 IEMNATIVE_REG_FIXED_TMP0);
163
164 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
165 iemNativeRegFreeTmp(pReNative, idxTmpReg);
166 return off;
167}
168
169
170# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
171DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
172{
173 /* Compare the shadow with the context value, they should match. */
174 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
175 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
176 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
177 return off;
178}
179# endif
180
181#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
182
183/**
184 * Flushes delayed write of a specific guest register.
185 *
186 * This must be called prior to calling CImpl functions and any helpers that use
187 * the guest state (like raising exceptions) and such.
188 *
189 * This optimization has not yet been implemented. The first target would be
190 * RIP updates, since these are the most common ones.
191 */
192DECL_INLINE_THROW(uint32_t)
193iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
194{
195#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
196 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
197#endif
198
199#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
200#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
201 if ( enmClass == kIemNativeGstRegRef_EFlags
202 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
203 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
204#else
205 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
206#endif
207
208 if ( enmClass == kIemNativeGstRegRef_Gpr
209 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
210 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
211#endif
212
213#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
214 if ( enmClass == kIemNativeGstRegRef_XReg
215 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
216 {
217 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
218 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
219 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
220
221 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
222 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
223 }
224#endif
225 RT_NOREF(pReNative, enmClass, idxReg);
226 return off;
227}
228
229
230
231/*********************************************************************************************************************************
232* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
233*********************************************************************************************************************************/
234
235#undef IEM_MC_BEGIN /* unused */
236#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
237 { \
238 Assert(pReNative->Core.bmVars == 0); \
239 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
240 Assert(pReNative->Core.bmStack == 0); \
241 pReNative->fMc = (a_fMcFlags); \
242 pReNative->fCImpl = (a_fCImplFlags); \
243 pReNative->cArgsX = (a_cArgsIncludingHidden)
244
245/** We have to get to the end in recompilation mode, as otherwise we won't
246 * generate code for all the IEM_MC_IF_XXX branches. */
247#define IEM_MC_END() \
248 iemNativeVarFreeAll(pReNative); \
249 } return off
250
251
252
253/*********************************************************************************************************************************
254* Liveness Stubs *
255*********************************************************************************************************************************/
256
257#define IEM_MC_LIVENESS_GREG_INPUT(a_iGReg) ((void)0)
258#define IEM_MC_LIVENESS_GREG_CLOBBER(a_iGReg) ((void)0)
259#define IEM_MC_LIVENESS_GREG_MODIFY(a_iGReg) ((void)0)
260
261#define IEM_MC_LIVENESS_MREG_INPUT(a_iMReg) ((void)0)
262#define IEM_MC_LIVENESS_MREG_CLOBBER(a_iMReg) ((void)0)
263#define IEM_MC_LIVENESS_MREG_MODIFY(a_iMReg) ((void)0)
264
265#define IEM_MC_LIVENESS_XREG_INPUT(a_iXReg) ((void)0)
266#define IEM_MC_LIVENESS_XREG_CLOBBER(a_iXReg) ((void)0)
267#define IEM_MC_LIVENESS_XREG_MODIFY(a_iXReg) ((void)0)
268
269#define IEM_MC_LIVENESS_MXCSR_INPUT() ((void)0)
270#define IEM_MC_LIVENESS_MXCSR_CLOBBER() ((void)0)
271#define IEM_MC_LIVENESS_MXCSR_MODIFY() ((void)0)
272
273
274/*********************************************************************************************************************************
275* Native Emitter Support. *
276*********************************************************************************************************************************/
277
278#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
279
280#define IEM_MC_NATIVE_ELSE() } else {
281
282#define IEM_MC_NATIVE_ENDIF() } ((void)0)
283
284
285#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
286 off = a_fnEmitter(pReNative, off)
287
288#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
289 off = a_fnEmitter(pReNative, off, (a0))
290
291#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
292 off = a_fnEmitter(pReNative, off, (a0), (a1))
293
294#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
295 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
296
297#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
298 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
299
300#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
301 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
302
303#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
304 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
305
306#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
307 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
308
309#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
310 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
311
312#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
313 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
314
315
316#ifndef RT_ARCH_AMD64
317# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
318#else
319/** @note This is a naive approach that ASSUMES that the register isn't
320 * allocated, so it only works safely for the first allocation(s) in
321 * a MC block. */
322# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
323 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
324
325DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg,
326 uint32_t off, bool fAllocated);
327
328DECL_INLINE_THROW(uint32_t)
329iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
330{
331 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
332 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
333 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
334
335# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
336 /* Must flush the register if it hold pending writes. */
337 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
338 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
339 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
340# endif
341
342 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off, false /*fAllocated*/);
343 return off;
344}
345
346#endif /* RT_ARCH_AMD64 */
347
348
349
350/*********************************************************************************************************************************
351* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
352*********************************************************************************************************************************/
353
354#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
355 pReNative->fMc = 0; \
356 pReNative->fCImpl = (a_fFlags); \
357 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
358 a_cbInstr) /** @todo not used ... */
359
360
361#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
362 pReNative->fMc = 0; \
363 pReNative->fCImpl = (a_fFlags); \
364 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
365
366DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
367 uint8_t idxInstr, uint64_t a_fGstShwFlush,
368 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
369{
370 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
371}
372
373
374#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
375 pReNative->fMc = 0; \
376 pReNative->fCImpl = (a_fFlags); \
377 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
378 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
379
380DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
381 uint8_t idxInstr, uint64_t a_fGstShwFlush,
382 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
383{
384 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
385}
386
387
388#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
389 pReNative->fMc = 0; \
390 pReNative->fCImpl = (a_fFlags); \
391 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
392 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
393
394DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
395 uint8_t idxInstr, uint64_t a_fGstShwFlush,
396 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
397 uint64_t uArg2)
398{
399 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
400}
401
402
403
404/*********************************************************************************************************************************
405* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
406*********************************************************************************************************************************/
407
408/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
409 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
410DECL_INLINE_THROW(uint32_t)
411iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
412{
413 /*
414 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
415 * return with special status code and make the execution loop deal with
416 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
417 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
418 * could continue w/o interruption, it probably will drop into the
419 * debugger, so not worth the effort of trying to services it here and we
420 * just lump it in with the handling of the others.
421 *
422 * To simplify the code and the register state management even more (wrt
423 * immediate in AND operation), we always update the flags and skip the
424 * extra check associated conditional jump.
425 */
426 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
427 <= UINT32_MAX);
428#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
429 AssertMsg( pReNative->idxCurCall == 0
430 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
431 IEMLIVENESSBIT_IDX_EFL_OTHER)),
432 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
433 IEMLIVENESSBIT_IDX_EFL_OTHER)));
434#endif
435
436 /*
437 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
438 * any pending register writes must be flushed.
439 */
440 off = iemNativeRegFlushPendingWrites(pReNative, off);
441
442 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ForUpdate,
443 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER),
444 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER));
445 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_ReturnWithFlags>(pReNative, off, idxEflReg,
446 X86_EFL_TF
447 | CPUMCTX_DBG_HIT_DRX_MASK
448 | CPUMCTX_DBG_DBGF_MASK);
449 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
450 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
451
452 /* Free but don't flush the EFLAGS register. */
453 iemNativeRegFreeTmp(pReNative, idxEflReg);
454
455 return off;
456}
457
458
459/** Helper for iemNativeEmitFinishInstructionWithStatus. */
460DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
461{
462 unsigned const offOpcodes = pCallEntry->offOpcode;
463 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
464 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
465 {
466 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
467 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
468 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
469 }
470 AssertFailedReturn(NIL_RTGCPHYS);
471}
472
473
474/** The VINF_SUCCESS dummy. */
475template<int const a_rcNormal, bool const a_fIsJump>
476DECL_FORCE_INLINE_THROW(uint32_t)
477iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
478 int32_t const offJump)
479{
480 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
481 if (a_rcNormal != VINF_SUCCESS)
482 {
483#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
484 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
485#else
486 RT_NOREF_PV(pCallEntry);
487#endif
488
489 /* As this code returns from the TB any pending register writes must be flushed. */
490 off = iemNativeRegFlushPendingWrites(pReNative, off);
491
492 /*
493 * If we're in a conditional, mark the current branch as exiting so we
494 * can disregard its state when we hit the IEM_MC_ENDIF.
495 */
496 iemNativeMarkCurCondBranchAsExiting(pReNative);
497
498 /*
499 * Use the lookup table for getting to the next TB quickly.
500 * Note! In this code path there can only be one entry at present.
501 */
502 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
503 PCIEMTB const pTbOrg = pReNative->pTbOrg;
504 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
505 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
506
507#if 0
508 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
509 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
510 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
511 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
512 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
513
514 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
515
516#else
517 /* Load the index as argument #1 for the helper call at the given label. */
518 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
519
520 /*
521 * Figure out the physical address of the current instruction and see
522 * whether the next instruction we're about to execute is in the same
523 * page so we by can optimistically skip TLB loading.
524 *
525 * - This is safe for all cases in FLAT mode.
526 * - In segmentmented modes it is complicated, given that a negative
527 * jump may underflow EIP and a forward jump may overflow or run into
528 * CS.LIM and triggering a #GP. The only thing we can get away with
529 * now at compile time is forward jumps w/o CS.LIM checks, since the
530 * lack of CS.LIM checks means we're good for the entire physical page
531 * we're executing on and another 15 bytes before we run into CS.LIM.
532 */
533 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
534# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
535 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
536# endif
537 )
538 {
539 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
540 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
541 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
542 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
543
544 {
545 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
546 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
547
548 /* Load the key lookup flags into the 2nd argument for the helper call.
549 - This is safe wrt CS limit checking since we're only here for FLAT modes.
550 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
551 interrupt shadow.
552 - The NMI inhibiting is more questionable, though... */
553 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
554 * Should we copy it into fExec to simplify this? OTOH, it's just a
555 * couple of extra instructions if EFLAGS are already in a register. */
556 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
557 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
558
559 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
560 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookup>(pReNative, off);
561 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithIrq>(pReNative, off);
562 }
563 }
564 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
565 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlb>(pReNative, off);
566 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq>(pReNative, off);
567#endif
568 }
569 return off;
570}
571
572
573#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
574 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
575 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
576
577#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
578 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
579 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
580 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
581
582/** Same as iemRegAddToRip64AndFinishingNoFlags. */
583DECL_INLINE_THROW(uint32_t)
584iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
585{
586#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
587# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
588 if (!pReNative->Core.offPc)
589 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
590# endif
591
592 /* Allocate a temporary PC register. */
593 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
594
595 /* Perform the addition and store the result. */
596 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
597 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
598
599 /* Free but don't flush the PC register. */
600 iemNativeRegFreeTmp(pReNative, idxPcReg);
601#endif
602
603#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
604 pReNative->Core.offPc += cbInstr;
605 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
606# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
607 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 64);
608 off = iemNativeEmitPcDebugCheck(pReNative, off);
609# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
610 off = iemNativePcAdjustCheck(pReNative, off);
611# endif
612 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
613#endif
614
615 return off;
616}
617
618
619#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
620 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
621 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
622
623#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
624 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
625 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
626 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
627
628/** Same as iemRegAddToEip32AndFinishingNoFlags. */
629DECL_INLINE_THROW(uint32_t)
630iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
631{
632#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
633# ifdef IEMNATIVE_REG_FIXED_PC_DBG
634 if (!pReNative->Core.offPc)
635 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
636# endif
637
638 /* Allocate a temporary PC register. */
639 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
640
641 /* Perform the addition and store the result. */
642 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
643 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
644
645 /* Free but don't flush the PC register. */
646 iemNativeRegFreeTmp(pReNative, idxPcReg);
647#endif
648
649#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
650 pReNative->Core.offPc += cbInstr;
651 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
652# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
653 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 32);
654 off = iemNativeEmitPcDebugCheck(pReNative, off);
655# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
656 off = iemNativePcAdjustCheck(pReNative, off);
657# endif
658 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
659#endif
660
661 return off;
662}
663
664
665#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
666 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
667 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
668
669#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
670 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
671 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
672 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
673
674/** Same as iemRegAddToIp16AndFinishingNoFlags. */
675DECL_INLINE_THROW(uint32_t)
676iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
677{
678#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
679# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
680 if (!pReNative->Core.offPc)
681 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
682# endif
683
684 /* Allocate a temporary PC register. */
685 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
686
687 /* Perform the addition and store the result. */
688 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
689 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
690 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
691
692 /* Free but don't flush the PC register. */
693 iemNativeRegFreeTmp(pReNative, idxPcReg);
694#endif
695
696#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
697 pReNative->Core.offPc += cbInstr;
698 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
699# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
700 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 16);
701 off = iemNativeEmitPcDebugCheck(pReNative, off);
702# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
703 off = iemNativePcAdjustCheck(pReNative, off);
704# endif
705 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
706#endif
707
708 return off;
709}
710
711
712/*********************************************************************************************************************************
713* Common code for changing PC/RIP/EIP/IP. *
714*********************************************************************************************************************************/
715
716/**
717 * Emits code to check if the content of @a idxAddrReg is a canonical address,
718 * raising a \#GP(0) if it isn't.
719 *
720 * @returns New code buffer offset, UINT32_MAX on failure.
721 * @param pReNative The native recompile state.
722 * @param off The code buffer offset.
723 * @param idxAddrReg The host register with the address to check.
724 * @param idxInstr The current instruction.
725 */
726DECL_FORCE_INLINE_THROW(uint32_t)
727iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
728{
729 /*
730 * Make sure we don't have any outstanding guest register writes as we may
731 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
732 */
733 off = iemNativeRegFlushPendingWrites(pReNative, off);
734
735#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
736 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
737#else
738 RT_NOREF(idxInstr);
739#endif
740
741#ifdef RT_ARCH_AMD64
742 /*
743 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
744 * return raisexcpt();
745 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
746 */
747 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
748
749 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
750 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
751 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
752 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
753 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
754
755 iemNativeRegFreeTmp(pReNative, iTmpReg);
756
757#elif defined(RT_ARCH_ARM64)
758 /*
759 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
760 * return raisexcpt();
761 * ----
762 * mov x1, 0x800000000000
763 * add x1, x0, x1
764 * cmp xzr, x1, lsr 48
765 * b.ne .Lraisexcpt
766 */
767 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
768
769 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
770 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
771 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
772 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
773
774 iemNativeRegFreeTmp(pReNative, iTmpReg);
775
776#else
777# error "Port me"
778#endif
779 return off;
780}
781
782
783/**
784 * Emits code to check if the content of @a idxAddrReg is a canonical address,
785 * raising a \#GP(0) if it isn't.
786 *
787 * Caller makes sure everything is flushed, except maybe PC.
788 *
789 * @returns New code buffer offset, UINT32_MAX on failure.
790 * @param pReNative The native recompile state.
791 * @param off The code buffer offset.
792 * @param idxAddrReg The host register with the address to check.
793 * @param offDisp The relative displacement that has already been
794 * added to idxAddrReg and must be subtracted if
795 * raising a \#GP(0).
796 * @param idxInstr The current instruction.
797 */
798DECL_FORCE_INLINE_THROW(uint32_t)
799iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
800 uint8_t idxAddrReg, int64_t offDisp, uint8_t idxInstr)
801{
802#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
803 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
804#endif
805
806#ifdef RT_ARCH_AMD64
807 /*
808 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
809 * return raisexcpt();
810 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
811 */
812 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
813
814 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
815 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
816 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
817 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
818
819#elif defined(RT_ARCH_ARM64)
820 /*
821 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
822 * return raisexcpt();
823 * ----
824 * mov x1, 0x800000000000
825 * add x1, x0, x1
826 * cmp xzr, x1, lsr 48
827 * b.ne .Lraisexcpt
828 */
829 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
830
831 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
832 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
833 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
834#else
835# error "Port me"
836#endif
837
838 /* Jump to the #GP code (hoping static prediction considers forward branches as not-taken). */
839 uint32_t const offFixup1 = off;
840 off = iemNativeEmitJnzToFixed(pReNative, off, off /*8-bit jump suffices*/);
841
842 /* jump .Lnoexcept; Skip the #GP code. */
843 uint32_t const offFixup2 = off;
844 off = iemNativeEmitJmpToFixed(pReNative, off, off /*8-bit jump suffices*/);
845
846 /* .Lraisexcpt: */
847 iemNativeFixupFixedJump(pReNative, offFixup1, off);
848#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
849 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
850#else
851 RT_NOREF(idxInstr);
852#endif
853
854 /* Undo the PC adjustment and store the old PC value. */
855 off = iemNativeEmitSubGprImm(pReNative, off, idxAddrReg, offDisp, iTmpReg);
856 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxAddrReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
857
858 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
859
860 /* .Lnoexcept: */
861 iemNativeFixupFixedJump(pReNative, offFixup2, off);
862
863 iemNativeRegFreeTmp(pReNative, iTmpReg);
864 return off;
865}
866
867
868/**
869 * Emits code to check if the content of @a idxAddrReg is a canonical address,
870 * raising a \#GP(0) if it isn't.
871 *
872 * Caller makes sure everything is flushed, except maybe PC.
873 *
874 * @returns New code buffer offset, UINT32_MAX on failure.
875 * @param pReNative The native recompile state.
876 * @param off The code buffer offset.
877 * @param idxAddrReg The host register with the address to check.
878 * @param idxOldPcReg Register holding the old PC that offPc is relative
879 * to if available, otherwise UINT8_MAX.
880 * @param idxInstr The current instruction.
881 */
882DECL_FORCE_INLINE_THROW(uint32_t)
883iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
884 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
885{
886#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
887 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
888#endif
889
890#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
891# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
892 if (!pReNative->Core.offPc)
893# endif
894 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
895#else
896 RT_NOREF(idxInstr);
897#endif
898
899#ifdef RT_ARCH_AMD64
900 /*
901 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
902 * return raisexcpt();
903 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
904 */
905 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
906
907 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
908 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
909 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
910 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
911
912#elif defined(RT_ARCH_ARM64)
913 /*
914 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
915 * return raisexcpt();
916 * ----
917 * mov x1, 0x800000000000
918 * add x1, x0, x1
919 * cmp xzr, x1, lsr 48
920 * b.ne .Lraisexcpt
921 */
922 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
923
924 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
925 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
926 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
927#else
928# error "Port me"
929#endif
930
931#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
932 if (pReNative->Core.offPc)
933 {
934 /** @todo On x86, it is said that conditional jumps forward are statically
935 * predicited as not taken, so this isn't a very good construct.
936 * Investigate whether it makes sense to invert it and add another
937 * jump. Also, find out wtf the static predictor does here on arm! */
938 uint32_t const offFixup = off;
939 off = iemNativeEmitJzToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
940
941 /* .Lraisexcpt: */
942# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
943 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
944# endif
945 /* We need to update cpum.GstCtx.rip. */
946 if (idxOldPcReg == UINT8_MAX)
947 {
948 idxOldPcReg = iTmpReg;
949 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
950 }
951 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
952 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
953
954 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
955 iemNativeFixupFixedJump(pReNative, offFixup, off);
956 }
957 else
958#endif
959 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
960
961 iemNativeRegFreeTmp(pReNative, iTmpReg);
962
963 return off;
964}
965
966
967/**
968 * Emits code to check if that the content of @a idxAddrReg is within the limit
969 * of CS, raising a \#GP(0) if it isn't.
970 *
971 * @returns New code buffer offset; throws VBox status code on error.
972 * @param pReNative The native recompile state.
973 * @param off The code buffer offset.
974 * @param idxAddrReg The host register (32-bit) with the address to
975 * check.
976 * @param idxInstr The current instruction.
977 */
978DECL_FORCE_INLINE_THROW(uint32_t)
979iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
980 uint8_t idxAddrReg, uint8_t idxInstr)
981{
982 /*
983 * Make sure we don't have any outstanding guest register writes as we may
984 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
985 */
986 off = iemNativeRegFlushPendingWrites(pReNative, off);
987
988#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
989 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
990#else
991 RT_NOREF(idxInstr);
992#endif
993
994 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
995 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
996 kIemNativeGstRegUse_ReadOnly);
997
998 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
999 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
1000
1001 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1002 return off;
1003}
1004
1005
1006
1007
1008/**
1009 * Emits code to check if that the content of @a idxAddrReg is within the limit
1010 * of CS, raising a \#GP(0) if it isn't.
1011 *
1012 * Caller makes sure everything is flushed, except maybe PC.
1013 *
1014 * @returns New code buffer offset; throws VBox status code on error.
1015 * @param pReNative The native recompile state.
1016 * @param off The code buffer offset.
1017 * @param idxAddrReg The host register (32-bit) with the address to
1018 * check.
1019 * @param idxOldPcReg Register holding the old PC that offPc is relative
1020 * to if available, otherwise UINT8_MAX.
1021 * @param idxInstr The current instruction.
1022 */
1023DECL_FORCE_INLINE_THROW(uint32_t)
1024iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1025 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
1026{
1027#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1028 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1029#endif
1030
1031#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1032# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1033 if (!pReNative->Core.offPc)
1034# endif
1035 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1036#else
1037 RT_NOREF(idxInstr);
1038#endif
1039
1040 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
1041 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
1042 kIemNativeGstRegUse_ReadOnly);
1043
1044 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
1045#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1046 if (pReNative->Core.offPc)
1047 {
1048 uint32_t const offFixup = off;
1049 off = iemNativeEmitJbeToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
1050
1051 /* Raising a GP(0), but first we need to update cpum.GstCtx.rip. */
1052 if (idxOldPcReg == UINT8_MAX)
1053 {
1054 idxOldPcReg = idxAddrReg;
1055 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1056 }
1057 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
1058 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1059# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1060 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1061# endif
1062 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
1063 iemNativeFixupFixedJump(pReNative, offFixup, off);
1064 }
1065 else
1066#endif
1067 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
1068
1069 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1070 return off;
1071}
1072
1073
1074/*********************************************************************************************************************************
1075* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
1076*********************************************************************************************************************************/
1077
1078#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1079 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1080 (a_enmEffOpSize), pCallEntry->idxInstr); \
1081 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1082
1083#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1084 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1085 (a_enmEffOpSize), pCallEntry->idxInstr); \
1086 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1087 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1088
1089#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
1090 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1091 IEMMODE_16BIT, pCallEntry->idxInstr); \
1092 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1093
1094#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1095 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1096 IEMMODE_16BIT, pCallEntry->idxInstr); \
1097 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1098 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1099
1100#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
1101 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1102 IEMMODE_64BIT, pCallEntry->idxInstr); \
1103 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1104
1105#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1106 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1107 IEMMODE_64BIT, pCallEntry->idxInstr); \
1108 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1109 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1110
1111
1112#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1113 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1114 (a_enmEffOpSize), pCallEntry->idxInstr); \
1115 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1116
1117#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1118 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1119 (a_enmEffOpSize), pCallEntry->idxInstr); \
1120 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1121 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1122
1123#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
1124 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1125 IEMMODE_16BIT, pCallEntry->idxInstr); \
1126 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1127
1128#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1129 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1130 IEMMODE_16BIT, pCallEntry->idxInstr); \
1131 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1132 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1133
1134#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
1135 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1136 IEMMODE_64BIT, pCallEntry->idxInstr); \
1137 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1138
1139#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1140 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1141 IEMMODE_64BIT, pCallEntry->idxInstr); \
1142 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1143 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1144
1145/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
1146 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
1147 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
1148template<bool const a_fWithinPage>
1149DECL_INLINE_THROW(uint32_t)
1150iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1151 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1152{
1153 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
1154#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1155 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1156 if (a_fWithinPage && enmEffOpSize == IEMMODE_64BIT)
1157 {
1158 /* No #GP checking required, just update offPc and get on with it. */
1159 pReNative->Core.offPc += (int64_t)offDisp + cbInstr;
1160# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1161 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1162# endif
1163 }
1164 else
1165#endif
1166 {
1167 /* Flush all but PC iff we're doing a 64-bit update here and this isn't within a page.. */
1168 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT && !a_fWithinPage))
1169 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1170
1171 /* Allocate a temporary PC register. */
1172 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1173 kIemNativeGstRegUse_ForUpdate);
1174
1175 /* Perform the addition. */
1176 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr + pReNative->Core.offPc);
1177
1178 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
1179 {
1180 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
1181 We can skip this if the target is within the same page. */
1182 if (!a_fWithinPage)
1183 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(pReNative, off, idxPcReg,
1184 (int64_t)offDisp + cbInstr, idxInstr);
1185 }
1186 else
1187 {
1188 /* Just truncate the result to 16-bit IP. */
1189 Assert(enmEffOpSize == IEMMODE_16BIT);
1190 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1191 }
1192
1193#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1194# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1195 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1196 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1197# endif
1198 /* Since we've already got the new PC value in idxPcReg, we can just as
1199 well write it out and reset offPc to zero. Otherwise, we'd need to use
1200 a copy the shadow PC, which will cost another move instruction here. */
1201# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS)
1202 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
1203 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
1204 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
1205 Log4(("iemNativeEmitRip64RelativeJumpAndFinishingNoFlags: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u cCondDepth=%d\n",
1206 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped, pReNative->cCondDepth));
1207 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
1208# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1209 iemNativeDbgInfoAddNativeOffset(pReNative, off);
1210 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
1211# endif
1212# endif
1213 pReNative->Core.offPc = 0;
1214#endif
1215
1216 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1217
1218 /* Free but don't flush the PC register. */
1219 iemNativeRegFreeTmp(pReNative, idxPcReg);
1220 }
1221 return off;
1222}
1223
1224
1225#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1226 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1227 (a_enmEffOpSize), pCallEntry->idxInstr); \
1228 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1229
1230#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1231 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1232 (a_enmEffOpSize), pCallEntry->idxInstr); \
1233 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1234 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1235
1236#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
1237 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1238 IEMMODE_16BIT, pCallEntry->idxInstr); \
1239 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1240
1241#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1242 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1243 IEMMODE_16BIT, pCallEntry->idxInstr); \
1244 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1245 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1246
1247#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
1248 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1249 IEMMODE_32BIT, pCallEntry->idxInstr); \
1250 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1251
1252#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1253 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1254 IEMMODE_32BIT, pCallEntry->idxInstr); \
1255 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1256 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1257
1258
1259#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1260 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1261 (a_enmEffOpSize), pCallEntry->idxInstr); \
1262 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1263
1264#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1265 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1266 (a_enmEffOpSize), pCallEntry->idxInstr); \
1267 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1268 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1269
1270#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
1271 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1272 IEMMODE_16BIT, pCallEntry->idxInstr); \
1273 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1274
1275#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1276 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1277 IEMMODE_16BIT, pCallEntry->idxInstr); \
1278 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1279 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1280
1281#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
1282 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1283 IEMMODE_32BIT, pCallEntry->idxInstr); \
1284 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1285
1286#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1287 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1288 IEMMODE_32BIT, pCallEntry->idxInstr); \
1289 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1290 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1291
1292/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
1293 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
1294 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
1295template<bool const a_fFlat>
1296DECL_INLINE_THROW(uint32_t)
1297iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1298 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1299{
1300 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
1301#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1302 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1303#endif
1304
1305 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1306 if (!a_fFlat || enmEffOpSize == IEMMODE_16BIT)
1307 {
1308 off = iemNativeRegFlushPendingWrites(pReNative, off);
1309#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1310 Assert(pReNative->Core.offPc == 0);
1311#endif
1312 }
1313
1314 /* Allocate a temporary PC register. */
1315 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1316
1317 /* Perform the addition. */
1318#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1319 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1320#else
1321 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1322#endif
1323
1324 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
1325 if (enmEffOpSize == IEMMODE_16BIT)
1326 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1327
1328 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
1329 if (!a_fFlat)
1330 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1331
1332 /* Commit it. */
1333#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1334 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, enmEffOpSize == IEMMODE_32BIT ? 32 : 16);
1335 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1336#endif
1337
1338 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1339#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1340 pReNative->Core.offPc = 0;
1341#endif
1342
1343 /* Free but don't flush the PC register. */
1344 iemNativeRegFreeTmp(pReNative, idxPcReg);
1345
1346 return off;
1347}
1348
1349
1350#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
1351 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1352 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1353
1354#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
1355 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1356 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1357 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1358
1359#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
1360 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1361 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1362
1363#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1364 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1365 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1366 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1367
1368#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
1369 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1370 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1371
1372#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1373 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1374 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1375 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1376
1377/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
1378DECL_INLINE_THROW(uint32_t)
1379iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1380 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
1381{
1382 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1383 off = iemNativeRegFlushPendingWrites(pReNative, off);
1384
1385#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1386 Assert(pReNative->Core.offPc == 0);
1387 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1388#endif
1389
1390 /* Allocate a temporary PC register. */
1391 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1392
1393 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
1394 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
1395 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1396 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1397#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1398 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, 16);
1399 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1400#endif
1401 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1402
1403 /* Free but don't flush the PC register. */
1404 iemNativeRegFreeTmp(pReNative, idxPcReg);
1405
1406 return off;
1407}
1408
1409
1410
1411/*********************************************************************************************************************************
1412* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
1413*********************************************************************************************************************************/
1414
1415/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
1416#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
1417 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1418
1419/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
1420#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
1421 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1422
1423/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
1424#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
1425 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1426
1427/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
1428 * clears flags. */
1429#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
1430 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
1431 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1432
1433/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
1434 * clears flags. */
1435#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
1436 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
1437 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1438
1439/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
1440 * clears flags. */
1441#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
1442 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
1443 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1444
1445#undef IEM_MC_SET_RIP_U16_AND_FINISH
1446
1447
1448/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
1449#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
1450 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1451
1452/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
1453#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
1454 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1455
1456/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
1457 * clears flags. */
1458#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
1459 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
1460 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1461
1462/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
1463 * and clears flags. */
1464#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
1465 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
1466 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1467
1468#undef IEM_MC_SET_RIP_U32_AND_FINISH
1469
1470
1471/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
1472#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
1473 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1474
1475/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
1476 * and clears flags. */
1477#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
1478 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
1479 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1480
1481#undef IEM_MC_SET_RIP_U64_AND_FINISH
1482
1483
1484/** Same as iemRegRipJumpU16AndFinishNoFlags,
1485 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
1486DECL_INLINE_THROW(uint32_t)
1487iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
1488 uint8_t idxInstr, uint8_t cbVar)
1489{
1490 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1491 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1492
1493 /* If we can't rule out a #GP(0) below, flush all dirty register except for
1494 PC which will be handled specially by the two workers below if they raise a GP. */
1495 bool const fMayRaiseGp0 = (f64Bit && cbVar > sizeof(uint32_t)) || (!f64Bit && !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1496 uint8_t const idxOldPcReg = fMayRaiseGp0
1497 ? iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_Pc)
1498 : UINT8_MAX;
1499 if (fMayRaiseGp0)
1500 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1501
1502 /* Get a register with the new PC loaded from idxVarPc.
1503 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1504 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1505
1506 /* Check that the target is within CS.LIM / is canonical (may #GP(0) + exit TB). */
1507 if (fMayRaiseGp0)
1508 {
1509 if (f64Bit)
1510 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1511 else
1512 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1513 }
1514
1515 /* Store the result. */
1516 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1517
1518#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1519 pReNative->Core.offPc = 0;
1520 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1521# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1522 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1523 pReNative->Core.fDebugPcInitialized = true;
1524 Log4(("uPcUpdatingDebug=rip off=%#x\n", off));
1525# endif
1526#endif
1527
1528 if (idxOldPcReg != UINT8_MAX)
1529 iemNativeRegFreeTmp(pReNative, idxOldPcReg);
1530 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1531 /** @todo implictly free the variable? */
1532
1533 return off;
1534}
1535
1536
1537
1538/*********************************************************************************************************************************
1539* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1540*********************************************************************************************************************************/
1541
1542/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1543 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1544DECL_FORCE_INLINE_THROW(uint32_t)
1545iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1546{
1547 /* Use16BitSp: */
1548#ifdef RT_ARCH_AMD64
1549 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1550 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1551#else
1552 /* sub regeff, regrsp, #cbMem */
1553 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1554 /* and regeff, regeff, #0xffff */
1555 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1556 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1557 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1558 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1559#endif
1560 return off;
1561}
1562
1563
1564DECL_FORCE_INLINE(uint32_t)
1565iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1566{
1567 /* Use32BitSp: */
1568 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1569 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1570 return off;
1571}
1572
1573
1574DECL_INLINE_THROW(uint32_t)
1575iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1576 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
1577{
1578 /*
1579 * Assert sanity.
1580 */
1581#ifdef VBOX_STRICT
1582 if (RT_BYTE2(cBitsVarAndFlat) != 0)
1583 {
1584 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1585 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1586 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1587 Assert( pfnFunction
1588 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1589 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1590 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1591 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1592 : UINT64_C(0xc000b000a0009000) ));
1593 }
1594 else
1595 Assert( pfnFunction
1596 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1597 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1598 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1599 : UINT64_C(0xc000b000a0009000) ));
1600#endif
1601
1602#ifdef VBOX_STRICT
1603 /*
1604 * Check that the fExec flags we've got make sense.
1605 */
1606 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1607#endif
1608
1609 /*
1610 * To keep things simple we have to commit any pending writes first as we
1611 * may end up making calls.
1612 */
1613 /** @todo we could postpone this till we make the call and reload the
1614 * registers after returning from the call. Not sure if that's sensible or
1615 * not, though. */
1616 off = iemNativeRegFlushPendingWrites(pReNative, off);
1617
1618 /*
1619 * First we calculate the new RSP and the effective stack pointer value.
1620 * For 64-bit mode and flat 32-bit these two are the same.
1621 * (Code structure is very similar to that of PUSH)
1622 */
1623 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1624 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1625 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1626 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1627 ? cbMem : sizeof(uint16_t);
1628 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1629 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1630 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1631 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1632 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1633 if (cBitsFlat != 0)
1634 {
1635 Assert(idxRegEffSp == idxRegRsp);
1636 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1637 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1638 if (cBitsFlat == 64)
1639 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1640 else
1641 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1642 }
1643 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1644 {
1645 Assert(idxRegEffSp != idxRegRsp);
1646 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1647 kIemNativeGstRegUse_ReadOnly);
1648#ifdef RT_ARCH_AMD64
1649 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1650#else
1651 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1652#endif
1653 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1654 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1655 offFixupJumpToUseOtherBitSp = off;
1656 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1657 {
1658 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1659 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1660 }
1661 else
1662 {
1663 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1664 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1665 }
1666 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1667 }
1668 /* SpUpdateEnd: */
1669 uint32_t const offLabelSpUpdateEnd = off;
1670
1671 /*
1672 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1673 * we're skipping lookup).
1674 */
1675 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1676 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1677 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1678 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1679 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1680 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1681 : UINT32_MAX;
1682 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1683
1684
1685 if (!TlbState.fSkip)
1686 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1687 else
1688 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1689
1690 /*
1691 * Use16BitSp:
1692 */
1693 if (cBitsFlat == 0)
1694 {
1695#ifdef RT_ARCH_AMD64
1696 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1697#else
1698 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1699#endif
1700 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1701 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1702 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1703 else
1704 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1705 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1706 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1707 }
1708
1709 /*
1710 * TlbMiss:
1711 *
1712 * Call helper to do the pushing.
1713 */
1714 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1715
1716#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1717 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1718#else
1719 RT_NOREF(idxInstr);
1720#endif
1721
1722 /* Save variables in volatile registers. */
1723 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1724 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1725 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1726 | (RT_BIT_32(idxRegPc));
1727 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1728
1729 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1730 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1731 {
1732 /* Swap them using ARG0 as temp register: */
1733 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1734 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1735 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1736 }
1737 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1738 {
1739 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1740 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1741
1742 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1743 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1745 }
1746 else
1747 {
1748 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1749 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1750
1751 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1752 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1753 }
1754
1755#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
1756 /* Do delayed EFLAGS calculations. */
1757 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
1758 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
1759#endif
1760
1761 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1762 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1763
1764 /* Done setting up parameters, make the call. */
1765 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
1766
1767 /* Restore variables and guest shadow registers to volatile registers. */
1768 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1769 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1770
1771#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1772 if (!TlbState.fSkip)
1773 {
1774 /* end of TlbMiss - Jump to the done label. */
1775 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1776 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1777
1778 /*
1779 * TlbLookup:
1780 */
1781 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1782 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1783
1784 /*
1785 * Emit code to do the actual storing / fetching.
1786 */
1787 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1788# ifdef IEM_WITH_TLB_STATISTICS
1789 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1790 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1791# endif
1792 switch (cbMemAccess)
1793 {
1794 case 2:
1795 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1796 break;
1797 case 4:
1798 if (!fIsIntelSeg)
1799 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1800 else
1801 {
1802 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1803 PUSH FS in real mode, so we have to try emulate that here.
1804 We borrow the now unused idxReg1 from the TLB lookup code here. */
1805 uint8_t const idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1806 kIemNativeGstReg_EFlags);
1807 if (idxRegEfl != UINT8_MAX)
1808 {
1809#ifdef ARCH_AMD64
1810 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1811 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1812 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1813#else
1814 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1815 off, TlbState.idxReg1, idxRegEfl,
1816 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1817#endif
1818 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1819 }
1820 else
1821 {
1822 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1823 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1824 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1825 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1826 }
1827 /* ASSUMES the upper half of idxRegPc is ZERO. */
1828 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1829 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1830 }
1831 break;
1832 case 8:
1833 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1834 break;
1835 default:
1836 AssertFailed();
1837 }
1838
1839 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1840 TlbState.freeRegsAndReleaseVars(pReNative);
1841
1842 /*
1843 * TlbDone:
1844 *
1845 * Commit the new RSP value.
1846 */
1847 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1848 }
1849#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1850
1851#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1852 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1853#endif
1854 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1855 if (idxRegEffSp != idxRegRsp)
1856 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1857
1858 return off;
1859}
1860
1861
1862/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1863#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1864 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1865
1866/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1867 * clears flags. */
1868#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1869 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1870 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1871
1872/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1873#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1874 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1875
1876/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1877 * clears flags. */
1878#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1879 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1880 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1881
1882#undef IEM_MC_IND_CALL_U16_AND_FINISH
1883
1884
1885/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1886#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1887 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1888
1889/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1890 * clears flags. */
1891#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1892 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1893 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1894
1895#undef IEM_MC_IND_CALL_U32_AND_FINISH
1896
1897
1898/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1899 * an extra parameter, for use in 64-bit code. */
1900#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1901 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1902
1903
1904/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1905 * an extra parameter, for use in 64-bit code and we need to check and clear
1906 * flags. */
1907#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1908 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1909 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1910
1911#undef IEM_MC_IND_CALL_U64_AND_FINISH
1912
1913/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1914 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1915DECL_INLINE_THROW(uint32_t)
1916iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1917 uint8_t idxInstr, uint8_t cbVar)
1918{
1919 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1920 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1921
1922 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1923 off = iemNativeRegFlushPendingWrites(pReNative, off);
1924
1925#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1926 Assert(pReNative->Core.offPc == 0);
1927 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1928#endif
1929
1930 /* Get a register with the new PC loaded from idxVarPc.
1931 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1932 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1933
1934 /* Check limit (may #GP(0) + exit TB). */
1935 if (!f64Bit)
1936/** @todo we can skip this test in FLAT 32-bit mode. */
1937 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1938 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1939 else if (cbVar > sizeof(uint32_t))
1940 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1941
1942#if 1
1943 /* Allocate a temporary PC register, we don't want it shadowed. */
1944 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1945 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1946#else
1947 /* Allocate a temporary PC register. */
1948 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1949 true /*fNoVolatileRegs*/);
1950#endif
1951
1952 /* Perform the addition and push the variable to the guest stack. */
1953 /** @todo Flat variants for PC32 variants. */
1954 switch (cbVar)
1955 {
1956 case sizeof(uint16_t):
1957 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1958 /* Truncate the result to 16-bit IP. */
1959 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1960 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1961 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1962 break;
1963 case sizeof(uint32_t):
1964 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1965 /** @todo In FLAT mode we can use the flat variant. */
1966 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1967 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1968 break;
1969 case sizeof(uint64_t):
1970 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1971 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1972 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1973 break;
1974 default:
1975 AssertFailed();
1976 }
1977
1978 /* RSP got changed, so do this again. */
1979 off = iemNativeRegFlushPendingWrites(pReNative, off);
1980
1981 /* Store the result. */
1982 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1983#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1984 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1985 pReNative->Core.fDebugPcInitialized = true;
1986 Log4(("uPcUpdatingDebug=rip/indirect-call off=%#x\n", off));
1987#endif
1988
1989#if 1
1990 /* Need to transfer the shadow information to the new RIP register. */
1991 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1992#else
1993 /* Sync the new PC. */
1994 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1995#endif
1996 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1997 iemNativeRegFreeTmp(pReNative, idxPcReg);
1998 /** @todo implictly free the variable? */
1999
2000 return off;
2001}
2002
2003
2004/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2005 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
2006#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
2007 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2008
2009/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2010 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
2011 * flags. */
2012#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
2013 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
2014 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2015
2016/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2017 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2018#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
2019 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2020
2021/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2022 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2023 * flags. */
2024#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
2025 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
2026 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2027
2028/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2029 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2030#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
2031 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2032
2033/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2034 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2035 * flags. */
2036#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
2037 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
2038 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2039
2040#undef IEM_MC_REL_CALL_S16_AND_FINISH
2041
2042/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2043 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2044DECL_INLINE_THROW(uint32_t)
2045iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
2046 uint8_t idxInstr)
2047{
2048 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2049 off = iemNativeRegFlushPendingWrites(pReNative, off);
2050
2051#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2052 Assert(pReNative->Core.offPc == 0);
2053 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2054#endif
2055
2056 /* Allocate a temporary PC register. */
2057 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2058 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
2059 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2060
2061 /* Calculate the new RIP. */
2062 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2063 /* Truncate the result to 16-bit IP. */
2064 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
2065 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2066 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2067
2068 /* Truncate the result to 16-bit IP. */
2069 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
2070
2071 /* Check limit (may #GP(0) + exit TB). */
2072 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2073
2074 /* Perform the addition and push the variable to the guest stack. */
2075 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
2076 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
2077
2078 /* RSP got changed, so flush again. */
2079 off = iemNativeRegFlushPendingWrites(pReNative, off);
2080
2081 /* Store the result. */
2082 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2083#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2084 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2085 pReNative->Core.fDebugPcInitialized = true;
2086 Log4(("uPcUpdatingDebug=rip/rel-call-16 off=%#x offDisp=%d\n", off, offDisp));
2087#endif
2088
2089 /* Need to transfer the shadow information to the new RIP register. */
2090 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2091 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2092 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2093
2094 return off;
2095}
2096
2097
2098/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2099 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2100#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
2101 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
2102
2103/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2104 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2105 * flags. */
2106#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
2107 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
2108 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2109
2110#undef IEM_MC_REL_CALL_S32_AND_FINISH
2111
2112/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2113 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2114DECL_INLINE_THROW(uint32_t)
2115iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
2116 uint8_t idxInstr)
2117{
2118 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2119 off = iemNativeRegFlushPendingWrites(pReNative, off);
2120
2121#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2122 Assert(pReNative->Core.offPc == 0);
2123 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2124#endif
2125
2126 /* Allocate a temporary PC register. */
2127 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2128 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2129 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2130
2131 /* Update the EIP to get the return address. */
2132 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2133
2134 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2135 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2136 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2137 /** @todo we can skip this test in FLAT 32-bit mode. */
2138 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2139
2140 /* Perform Perform the return address to the guest stack. */
2141 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
2142 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
2143 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
2144
2145 /* RSP got changed, so do this again. */
2146 off = iemNativeRegFlushPendingWrites(pReNative, off);
2147
2148 /* Store the result. */
2149 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2150#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2151 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2152 pReNative->Core.fDebugPcInitialized = true;
2153 Log4(("uPcUpdatingDebug=eip/rel-call-32 off=%#x offDisp=%d\n", off, offDisp));
2154#endif
2155
2156 /* Need to transfer the shadow information to the new RIP register. */
2157 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2158 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2159 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2160
2161 return off;
2162}
2163
2164
2165/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2166 * an extra parameter, for use in 64-bit code. */
2167#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
2168 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
2169
2170/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2171 * an extra parameter, for use in 64-bit code and we need to check and clear
2172 * flags. */
2173#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
2174 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
2175 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2176
2177#undef IEM_MC_REL_CALL_S64_AND_FINISH
2178
2179/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2180 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2181DECL_INLINE_THROW(uint32_t)
2182iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
2183 uint8_t idxInstr)
2184{
2185 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2186 off = iemNativeRegFlushPendingWrites(pReNative, off);
2187
2188#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2189 Assert(pReNative->Core.offPc == 0);
2190 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2191#endif
2192
2193 /* Allocate a temporary PC register. */
2194 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2195 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2196 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2197
2198 /* Update the RIP to get the return address. */
2199 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
2200
2201 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2202 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
2203 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
2204 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2205
2206 /* Perform Perform the return address to the guest stack. */
2207 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
2208 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
2209
2210 /* RSP got changed, so do this again. */
2211 off = iemNativeRegFlushPendingWrites(pReNative, off);
2212
2213 /* Store the result. */
2214 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2215#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2216 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2217 pReNative->Core.fDebugPcInitialized = true;
2218 Log4(("uPcUpdatingDebug=rip/rel-call-64 off=%#x offDisp=%ld\n", off, offDisp));
2219#endif
2220
2221 /* Need to transfer the shadow information to the new RIP register. */
2222 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2223 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2224 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2225
2226 return off;
2227}
2228
2229
2230/*********************************************************************************************************************************
2231* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
2232*********************************************************************************************************************************/
2233
2234DECL_FORCE_INLINE_THROW(uint32_t)
2235iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2236 uint16_t cbPopAdd, uint8_t idxRegTmp)
2237{
2238 /* Use16BitSp: */
2239#ifdef RT_ARCH_AMD64
2240 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2241 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
2242 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
2243 RT_NOREF(idxRegTmp);
2244
2245#elif defined(RT_ARCH_ARM64)
2246 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
2247 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
2248 /* add tmp, regrsp, #cbMem */
2249 uint16_t const cbCombined = cbMem + cbPopAdd;
2250 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
2251 if (cbCombined >= RT_BIT_32(12))
2252 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
2253 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2254 /* and tmp, tmp, #0xffff */
2255 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2256 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
2257 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
2258 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
2259
2260#else
2261# error "Port me"
2262#endif
2263 return off;
2264}
2265
2266
2267DECL_FORCE_INLINE_THROW(uint32_t)
2268iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2269 uint16_t cbPopAdd)
2270{
2271 /* Use32BitSp: */
2272 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2273 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
2274 return off;
2275}
2276
2277
2278/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
2279#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
2280 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
2281
2282/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
2283#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2284 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
2285
2286/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
2287#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2288 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
2289
2290/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
2291 * clears flags. */
2292#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
2293 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
2294 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2295
2296/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
2297 * clears flags. */
2298#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2299 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
2300 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2301
2302/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
2303 * clears flags. */
2304#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2305 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
2306 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2307
2308/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
2309DECL_INLINE_THROW(uint32_t)
2310iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
2311 IEMMODE enmEffOpSize, uint8_t idxInstr)
2312{
2313 RT_NOREF(cbInstr);
2314
2315#ifdef VBOX_STRICT
2316 /*
2317 * Check that the fExec flags we've got make sense.
2318 */
2319 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
2320#endif
2321
2322 /*
2323 * To keep things simple we have to commit any pending writes first as we
2324 * may end up making calls.
2325 */
2326 off = iemNativeRegFlushPendingWrites(pReNative, off);
2327
2328 /*
2329 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
2330 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
2331 * directly as the effective stack pointer.
2332 * (Code structure is very similar to that of PUSH)
2333 *
2334 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
2335 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
2336 * aren't commonly used (or useful) and thus not in need of optimizing.
2337 *
2338 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
2339 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
2340 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
2341 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
2342 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
2343 */
2344 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
2345 ? sizeof(uint64_t)
2346 : enmEffOpSize == IEMMODE_32BIT
2347 ? sizeof(uint32_t)
2348 : sizeof(uint16_t);
2349 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
2350 uintptr_t const pfnFunction = fFlat
2351 ? enmEffOpSize == IEMMODE_64BIT
2352 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
2353 : (uintptr_t)iemNativeHlpStackFlatFetchU32
2354 : enmEffOpSize == IEMMODE_32BIT
2355 ? (uintptr_t)iemNativeHlpStackFetchU32
2356 : (uintptr_t)iemNativeHlpStackFetchU16;
2357 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
2358 fFlat ? kIemNativeGstRegUse_ForUpdate
2359 : kIemNativeGstRegUse_Calculation,
2360 true /*fNoVolatileRegs*/);
2361 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
2362 /** @todo can do a better job picking the register here. For cbMem >= 4 this
2363 * will be the resulting register value. */
2364 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
2365
2366 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
2367 if (fFlat)
2368 Assert(idxRegEffSp == idxRegRsp);
2369 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
2370 {
2371 Assert(idxRegEffSp != idxRegRsp);
2372 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
2373 kIemNativeGstRegUse_ReadOnly);
2374#ifdef RT_ARCH_AMD64
2375 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2376#else
2377 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2378#endif
2379 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
2380 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
2381 offFixupJumpToUseOtherBitSp = off;
2382 if (enmEffOpSize == IEMMODE_32BIT)
2383 {
2384 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
2385 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2386 }
2387 else
2388 {
2389 Assert(enmEffOpSize == IEMMODE_16BIT);
2390 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
2391 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2392 idxRegMemResult);
2393 }
2394 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2395 }
2396 /* SpUpdateEnd: */
2397 uint32_t const offLabelSpUpdateEnd = off;
2398
2399 /*
2400 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
2401 * we're skipping lookup).
2402 */
2403 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
2404 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
2405 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
2406 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
2407 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
2408 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
2409 : UINT32_MAX;
2410
2411 if (!TlbState.fSkip)
2412 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
2413 else
2414 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
2415
2416 /*
2417 * Use16BitSp:
2418 */
2419 if (!fFlat)
2420 {
2421#ifdef RT_ARCH_AMD64
2422 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2423#else
2424 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2425#endif
2426 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
2427 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
2428 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2429 idxRegMemResult);
2430 else
2431 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2432 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
2433 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2434 }
2435
2436 /*
2437 * TlbMiss:
2438 *
2439 * Call helper to do the pushing.
2440 */
2441 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
2442
2443#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2444 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2445#else
2446 RT_NOREF(idxInstr);
2447#endif
2448
2449 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
2450 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
2451 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
2452 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
2453
2454
2455 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
2456 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
2457 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
2458
2459#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2460 /* Do delayed EFLAGS calculations. */
2461 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
2462#endif
2463
2464 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
2465 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2466
2467 /* Done setting up parameters, make the call. */
2468 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
2469
2470 /* Move the return register content to idxRegMemResult. */
2471 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
2472 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
2473
2474 /* Restore variables and guest shadow registers to volatile registers. */
2475 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
2476 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
2477
2478#ifdef IEMNATIVE_WITH_TLB_LOOKUP
2479 if (!TlbState.fSkip)
2480 {
2481 /* end of TlbMiss - Jump to the done label. */
2482 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
2483 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
2484
2485 /*
2486 * TlbLookup:
2487 */
2488 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
2489 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
2490
2491 /*
2492 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
2493 */
2494 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2495# ifdef IEM_WITH_TLB_STATISTICS
2496 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
2497 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
2498# endif
2499 switch (cbMem)
2500 {
2501 case 2:
2502 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2503 break;
2504 case 4:
2505 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2506 break;
2507 case 8:
2508 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2509 break;
2510 default:
2511 AssertFailed();
2512 }
2513
2514 TlbState.freeRegsAndReleaseVars(pReNative);
2515
2516 /*
2517 * TlbDone:
2518 *
2519 * Set the new RSP value (FLAT accesses needs to calculate it first) and
2520 * commit the popped register value.
2521 */
2522 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
2523 }
2524#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
2525
2526 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
2527 if (!f64Bit)
2528/** @todo we can skip this test in FLAT 32-bit mode. */
2529 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2530 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2531 else if (enmEffOpSize == IEMMODE_64BIT)
2532 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2533
2534 /* Complete RSP calculation for FLAT mode. */
2535 if (idxRegEffSp == idxRegRsp)
2536 {
2537 if (enmEffOpSize == IEMMODE_64BIT)
2538 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
2539 else
2540 {
2541 Assert(enmEffOpSize == IEMMODE_32BIT);
2542 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
2543 }
2544 }
2545
2546 /* Commit the result and clear any current guest shadows for RIP. */
2547 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
2548 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2549 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2550#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2551 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2552 pReNative->Core.fDebugPcInitialized = true;
2553 Log4(("uPcUpdatingDebug=rip/ret off=%#x\n", off));
2554#endif
2555
2556 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2557 if (!fFlat)
2558 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2559
2560 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2561 if (idxRegEffSp != idxRegRsp)
2562 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2563 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2564 return off;
2565}
2566
2567
2568/*********************************************************************************************************************************
2569* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2570*********************************************************************************************************************************/
2571
2572#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2573 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2574
2575/**
2576 * Emits code to check if a \#NM exception should be raised.
2577 *
2578 * @returns New code buffer offset, UINT32_MAX on failure.
2579 * @param pReNative The native recompile state.
2580 * @param off The code buffer offset.
2581 * @param idxInstr The current instruction.
2582 */
2583DECL_INLINE_THROW(uint32_t)
2584iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2585{
2586#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2587 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2588
2589 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2590 {
2591#endif
2592 /*
2593 * Make sure we don't have any outstanding guest register writes as we may
2594 * raise an #NM and all guest register must be up to date in CPUMCTX.
2595 */
2596 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2597 off = iemNativeRegFlushPendingWrites(pReNative, off);
2598
2599#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2600 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2601#else
2602 RT_NOREF(idxInstr);
2603#endif
2604
2605 /* Allocate a temporary CR0 register. */
2606 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2607 kIemNativeGstRegUse_ReadOnly);
2608
2609 /*
2610 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2611 * return raisexcpt();
2612 */
2613 /* Test and jump. */
2614 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg,
2615 X86_CR0_EM | X86_CR0_TS);
2616
2617 /* Free but don't flush the CR0 register. */
2618 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2619
2620#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2621 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2622 }
2623 else
2624 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2625#endif
2626
2627 return off;
2628}
2629
2630
2631#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2632 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2633
2634/**
2635 * Emits code to check if a \#NM exception should be raised.
2636 *
2637 * @returns New code buffer offset, UINT32_MAX on failure.
2638 * @param pReNative The native recompile state.
2639 * @param off The code buffer offset.
2640 * @param idxInstr The current instruction.
2641 */
2642DECL_INLINE_THROW(uint32_t)
2643iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2644{
2645#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2646 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2647
2648 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2649 {
2650#endif
2651 /*
2652 * Make sure we don't have any outstanding guest register writes as we may
2653 * raise an #NM and all guest register must be up to date in CPUMCTX.
2654 */
2655 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2656 off = iemNativeRegFlushPendingWrites(pReNative, off);
2657
2658#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2659 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2660#else
2661 RT_NOREF(idxInstr);
2662#endif
2663
2664 /* Allocate a temporary CR0 register. */
2665 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2666 kIemNativeGstRegUse_Calculation);
2667
2668 /*
2669 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2670 * return raisexcpt();
2671 */
2672 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2673 /* Test and jump. */
2674 off = iemNativeEmitTbExitIfGpr32EqualsImm<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2675
2676 /* Free the CR0 register. */
2677 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2678
2679#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2680 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2681 }
2682 else
2683 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2684#endif
2685
2686 return off;
2687}
2688
2689
2690#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2691 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2692
2693/**
2694 * Emits code to check if a \#MF exception should be raised.
2695 *
2696 * @returns New code buffer offset, UINT32_MAX on failure.
2697 * @param pReNative The native recompile state.
2698 * @param off The code buffer offset.
2699 * @param idxInstr The current instruction.
2700 */
2701DECL_INLINE_THROW(uint32_t)
2702iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2703{
2704 /*
2705 * Make sure we don't have any outstanding guest register writes as we may
2706 * raise an #MF and all guest register must be up to date in CPUMCTX.
2707 */
2708 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2709 off = iemNativeRegFlushPendingWrites(pReNative, off);
2710
2711#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2712 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2713#else
2714 RT_NOREF(idxInstr);
2715#endif
2716
2717 /* Allocate a temporary FSW register. */
2718 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2719 kIemNativeGstRegUse_ReadOnly);
2720
2721 /*
2722 * if (FSW & X86_FSW_ES != 0)
2723 * return raisexcpt();
2724 */
2725 /* Test and jump. */
2726 off = iemNativeEmitTbExitIfBitSetInGpr<kIemNativeLabelType_RaiseMf>(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT);
2727
2728 /* Free but don't flush the FSW register. */
2729 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2730
2731 return off;
2732}
2733
2734
2735#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2736 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2737
2738/**
2739 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2740 *
2741 * @returns New code buffer offset, UINT32_MAX on failure.
2742 * @param pReNative The native recompile state.
2743 * @param off The code buffer offset.
2744 * @param idxInstr The current instruction.
2745 */
2746DECL_INLINE_THROW(uint32_t)
2747iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2748{
2749#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2750 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2751
2752 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2753 {
2754#endif
2755 /*
2756 * Make sure we don't have any outstanding guest register writes as we may
2757 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2758 */
2759 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2760 off = iemNativeRegFlushPendingWrites(pReNative, off);
2761
2762#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2763 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2764#else
2765 RT_NOREF(idxInstr);
2766#endif
2767
2768 /* Allocate a temporary CR0 and CR4 register. */
2769 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2770 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2771 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2772
2773 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2774#ifdef RT_ARCH_AMD64
2775 /*
2776 * We do a modified test here:
2777 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2778 * else { goto RaiseSseRelated; }
2779 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2780 * all targets except the 386, which doesn't support SSE, this should
2781 * be a safe assumption.
2782 */
2783 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2784 1+6+3+3+7+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2785 //pCodeBuf[off++] = 0xcc;
2786 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2787 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2788 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2789 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2790 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2791 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2792
2793#elif defined(RT_ARCH_ARM64)
2794 /*
2795 * We do a modified test here:
2796 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2797 * else { goto RaiseSseRelated; }
2798 */
2799 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2800 1+5 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2801 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2802 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2803 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2804 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2805 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2806 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2807 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2808 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2809 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off,
2810 idxTmpReg, false /*f64Bit*/);
2811
2812#else
2813# error "Port me!"
2814#endif
2815
2816 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2817 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2818 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2819 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2820
2821#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2822 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2823 }
2824 else
2825 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2826#endif
2827
2828 return off;
2829}
2830
2831
2832#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2833 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2834
2835/**
2836 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2837 *
2838 * @returns New code buffer offset, UINT32_MAX on failure.
2839 * @param pReNative The native recompile state.
2840 * @param off The code buffer offset.
2841 * @param idxInstr The current instruction.
2842 */
2843DECL_INLINE_THROW(uint32_t)
2844iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2845{
2846#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2847 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2848
2849 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2850 {
2851#endif
2852 /*
2853 * Make sure we don't have any outstanding guest register writes as we may
2854 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2855 */
2856 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2857 off = iemNativeRegFlushPendingWrites(pReNative, off);
2858
2859#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2860 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2861#else
2862 RT_NOREF(idxInstr);
2863#endif
2864
2865 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2866 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2867 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2868 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2869 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2870
2871 /*
2872 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2873 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2874 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2875 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2876 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2877 * { likely }
2878 * else { goto RaiseAvxRelated; }
2879 */
2880#ifdef RT_ARCH_AMD64
2881 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2882 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2883 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2884 ^ 0x1a) ) { likely }
2885 else { goto RaiseAvxRelated; } */
2886 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2887 1+6+3+5+3+5+3+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2888 //pCodeBuf[off++] = 0xcc;
2889 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2890 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2891 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2892 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2893 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2894 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2895 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2896 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2897 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2898 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2899 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2900
2901#elif defined(RT_ARCH_ARM64)
2902 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2903 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2904 else { goto RaiseAvxRelated; } */
2905 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2906 1+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2907 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2908 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2909 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2910 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2911 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2912 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2913 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2914 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2915 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2916 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2917 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2918 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off,
2919 idxTmpReg, false /*f64Bit*/);
2920
2921#else
2922# error "Port me!"
2923#endif
2924
2925 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2926 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2927 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2928 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2929#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2930 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2931 }
2932 else
2933 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2934#endif
2935
2936 return off;
2937}
2938
2939
2940#define IEM_MC_RAISE_DIVIDE_ERROR_IF_LOCAL_IS_ZERO(a_uVar) \
2941 off = iemNativeEmitRaiseDivideErrorIfLocalIsZero(pReNative, off, a_uVar, pCallEntry->idxInstr)
2942
2943/**
2944 * Emits code to raise a \#DE if a local variable is zero.
2945 *
2946 * @returns New code buffer offset, UINT32_MAX on failure.
2947 * @param pReNative The native recompile state.
2948 * @param off The code buffer offset.
2949 * @param idxVar The variable to check. This must be 32-bit (EFLAGS).
2950 * @param idxInstr The current instruction.
2951 */
2952DECL_INLINE_THROW(uint32_t)
2953iemNativeEmitRaiseDivideErrorIfLocalIsZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxInstr)
2954{
2955 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2956 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, sizeof(uint32_t));
2957
2958 /* Make sure we don't have any outstanding guest register writes as we may. */
2959 off = iemNativeRegFlushPendingWrites(pReNative, off);
2960
2961 /* Set the instruction number if we're counting. */
2962#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2963 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2964#else
2965 RT_NOREF(idxInstr);
2966#endif
2967
2968 /* Do the job we're here for. */
2969 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off);
2970 off = iemNativeEmitTbExitIfGprIsZero<kIemNativeLabelType_RaiseDe>(pReNative, off, idxVarReg, false /*f64Bit*/);
2971 iemNativeVarRegisterRelease(pReNative, idxVar);
2972
2973 return off;
2974}
2975
2976
2977#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2978 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2979
2980/**
2981 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2982 *
2983 * @returns New code buffer offset, UINT32_MAX on failure.
2984 * @param pReNative The native recompile state.
2985 * @param off The code buffer offset.
2986 * @param idxInstr The current instruction.
2987 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2988 * @param cbAlign The alignment in bytes to check against.
2989 */
2990DECL_INLINE_THROW(uint32_t)
2991iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2992 uint8_t idxVarEffAddr, uint8_t cbAlign)
2993{
2994 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2995 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2996
2997 /*
2998 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2999 */
3000 off = iemNativeRegFlushPendingWrites(pReNative, off);
3001
3002#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3003 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
3004#else
3005 RT_NOREF(idxInstr);
3006#endif
3007
3008 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
3009 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseGp0>(pReNative, off, idxVarReg, cbAlign - 1);
3010 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
3011
3012 return off;
3013}
3014
3015
3016/*********************************************************************************************************************************
3017* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
3018*********************************************************************************************************************************/
3019
3020/**
3021 * Pushes an IEM_MC_IF_XXX onto the condition stack.
3022 *
3023 * @returns Pointer to the condition stack entry on success, NULL on failure
3024 * (too many nestings)
3025 */
3026DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
3027{
3028 uint32_t const idxStack = pReNative->cCondDepth;
3029 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
3030
3031 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
3032 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
3033
3034 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
3035 pEntry->fInElse = false;
3036 pEntry->fIfExitTb = false;
3037 pEntry->fElseExitTb = false;
3038 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
3039 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
3040
3041 return pEntry;
3042}
3043
3044
3045/**
3046 * Start of the if-block, snapshotting the register and variable state.
3047 */
3048DECL_INLINE_THROW(void)
3049iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
3050{
3051 Assert(offIfBlock != UINT32_MAX);
3052 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3053 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3054 Assert(!pEntry->fInElse);
3055
3056 /* Define the start of the IF block if request or for disassembly purposes. */
3057 if (idxLabelIf != UINT32_MAX)
3058 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
3059#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3060 else
3061 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
3062#else
3063 RT_NOREF(offIfBlock);
3064#endif
3065
3066 /* Copy the initial state so we can restore it in the 'else' block. */
3067 pEntry->InitialState = pReNative->Core;
3068}
3069
3070
3071#define IEM_MC_ELSE() } while (0); \
3072 off = iemNativeEmitElse(pReNative, off); \
3073 do {
3074
3075/** Emits code related to IEM_MC_ELSE. */
3076DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3077{
3078 /* Check sanity and get the conditional stack entry. */
3079 Assert(off != UINT32_MAX);
3080 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3081 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3082 Assert(!pEntry->fInElse);
3083
3084 /* We can skip dirty register flushing and the dirty register flushing if
3085 the branch already jumped to a TB exit. */
3086 if (!pEntry->fIfExitTb)
3087 {
3088#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) && 0
3089 /* Writeback any dirty shadow registers. */
3090 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
3091 * in one of the branches and leave guest registers already dirty before the start of the if
3092 * block alone. */
3093 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3094#endif
3095
3096 /* Jump to the endif. */
3097 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
3098 }
3099# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3100 else
3101 Assert(pReNative->Core.offPc == 0);
3102# endif
3103
3104 /* Define the else label and enter the else part of the condition. */
3105 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3106 pEntry->fInElse = true;
3107
3108 /* Snapshot the core state so we can do a merge at the endif and restore
3109 the snapshot we took at the start of the if-block. */
3110 pEntry->IfFinalState = pReNative->Core;
3111 pReNative->Core = pEntry->InitialState;
3112
3113 return off;
3114}
3115
3116
3117#define IEM_MC_ENDIF() } while (0); \
3118 off = iemNativeEmitEndIf(pReNative, off)
3119
3120/** Emits code related to IEM_MC_ENDIF. */
3121DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3122{
3123 /* Check sanity and get the conditional stack entry. */
3124 Assert(off != UINT32_MAX);
3125 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3126 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3127
3128#if defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) && 0
3129 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3130#endif
3131
3132 /*
3133 * If either of the branches exited the TB, we can take the state from the
3134 * other branch and skip all the merging headache.
3135 */
3136 bool fDefinedLabels = false;
3137 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
3138 {
3139#ifdef VBOX_STRICT
3140 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
3141 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
3142 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
3143 ? &pEntry->IfFinalState : &pReNative->Core;
3144# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3145 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
3146# endif
3147# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3148 Assert(pExitCoreState->offPc == 0);
3149# endif
3150 RT_NOREF(pExitCoreState);
3151#endif
3152
3153 if (!pEntry->fIfExitTb)
3154 {
3155 Assert(pEntry->fInElse);
3156 pReNative->Core = pEntry->IfFinalState;
3157 }
3158 }
3159 else
3160 {
3161 /*
3162 * Now we have find common group with the core state at the end of the
3163 * if-final. Use the smallest common denominator and just drop anything
3164 * that isn't the same in both states.
3165 */
3166 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
3167 * which is why we're doing this at the end of the else-block.
3168 * But we'd need more info about future for that to be worth the effort. */
3169 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
3170#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3171 AssertMsgStmt(pReNative->Core.offPc == pOther->offPc,
3172 ("Core.offPc=%#RX64 pOther->offPc=%#RX64\n", pReNative->Core.offPc, pOther->offPc),
3173 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3174#endif
3175
3176 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
3177 {
3178#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3179 /*
3180 * If the branch has differences in dirty shadow registers, we will flush
3181 * the register only dirty in the current branch and dirty any that's only
3182 * dirty in the other one.
3183 */
3184 uint64_t const fGstRegDirtyOther = pOther->bmGstRegShadowDirty;
3185 uint64_t const fGstRegDirtyThis = pReNative->Core.bmGstRegShadowDirty;
3186 uint64_t const fGstRegDirtyDiff = fGstRegDirtyOther ^ fGstRegDirtyThis;
3187 uint64_t const fGstRegDirtyHead = fGstRegDirtyThis & fGstRegDirtyDiff;
3188 uint64_t fGstRegDirtyTail = fGstRegDirtyOther & fGstRegDirtyDiff;
3189 if (!fGstRegDirtyDiff)
3190 { /* likely */ }
3191 else
3192 {
3193 //uint64_t const fGstRegDirtyHead = pReNative->Core.bmGstRegShadowDirty & fGstRegDirtyDiff;
3194 if (fGstRegDirtyHead)
3195 {
3196 Log12(("iemNativeEmitEndIf: flushing dirty guest registers in current branch: %RX64\n", fGstRegDirtyHead));
3197 off = iemNativeRegFlushDirtyGuest(pReNative, off, fGstRegDirtyHead);
3198 }
3199 }
3200#endif
3201
3202 /*
3203 * Shadowed guest registers.
3204 *
3205 * We drop any shadows where the two states disagree about where
3206 * things are kept. We may end up flushing dirty more registers
3207 * here, if the two branches keeps things in different registers.
3208 */
3209 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
3210 if (fGstRegs)
3211 {
3212 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
3213 do
3214 {
3215 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3216 fGstRegs &= ~RT_BIT_64(idxGstReg);
3217
3218 uint8_t const idxCurHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3219 uint8_t const idxOtherHstReg = pOther->aidxGstRegShadows[idxGstReg];
3220 if ( idxCurHstReg != idxOtherHstReg
3221 || !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg)))
3222 {
3223#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3224 Log12(("iemNativeEmitEndIf: dropping gst %s (%d) from hst %s (other %d/%#RX64)\n",
3225 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3226 idxOtherHstReg, pOther->bmGstRegShadows));
3227#else
3228 Log12(("iemNativeEmitEndIf: dropping %s gst %s (%d) from hst %s (other %d/%#RX64/%s)\n",
3229 pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "_dirty_" : "clean",
3230 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3231 idxOtherHstReg, pOther->bmGstRegShadows,
3232 pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "dirty" : "clean"));
3233 if (pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3234 fGstRegDirtyTail |= RT_BIT_64(idxGstReg);
3235 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3236 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3237#endif
3238 iemNativeRegClearGstRegShadowingOne(pReNative, idxCurHstReg, (IEMNATIVEGSTREG)idxGstReg, off);
3239 }
3240 } while (fGstRegs);
3241 }
3242 else
3243 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
3244
3245#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3246 /*
3247 * Generate jumpy code for flushing dirty registers from the other
3248 * branch that aren't dirty in the current one.
3249 */
3250 if (!fGstRegDirtyTail)
3251 { /* likely */ }
3252 else
3253 {
3254 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEndIfOtherBranchDirty);
3255 Log12(("iemNativeEmitEndIf: Dirty register only in the other branch: %#RX64 - BAD!\n", fGstRegDirtyTail));
3256
3257 /* First the current branch has to jump over the dirty flushing from the other branch. */
3258 uint32_t const offFixup1 = off;
3259 off = iemNativeEmitJmpToFixed(pReNative, off, off + 10);
3260
3261 /* Put the endif and maybe else label here so the other branch ends up here. */
3262 if (!pEntry->fInElse)
3263 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3264 else
3265 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3266 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3267 fDefinedLabels = true;
3268
3269 /* Flush the dirty guest registers from the other branch. */
3270 while (fGstRegDirtyTail)
3271 {
3272 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegDirtyTail) - 1;
3273 fGstRegDirtyTail &= ~RT_BIT_64(idxGstReg);
3274 Log12(("iemNativeEmitEndIf: tail flushing %s (%d) from other branch %d (cur %d/%#RX64)\n",
3275 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, pOther->aidxGstRegShadows[idxGstReg],
3276 pReNative->Core.aidxGstRegShadows[idxGstReg], pReNative->Core.bmGstRegShadows));
3277
3278 off = iemNativeRegFlushPendingWriteEx(pReNative, off, (PIEMNATIVECORESTATE)pOther, (IEMNATIVEGSTREG)idxGstReg);
3279
3280 /* Mismatching shadowing should've been dropped in the previous step already. */
3281 Assert( !(pReNative->Core.bmGstRegShadows & RT_BIT_64(idxGstReg))
3282 || pReNative->Core.aidxGstRegShadows[idxGstReg] == pOther->aidxGstRegShadows[idxGstReg]);
3283 }
3284
3285 /* Here is the actual endif label, fixup the above jump to land here. */
3286 iemNativeFixupFixedJump(pReNative, offFixup1, off);
3287 }
3288#endif
3289
3290 /*
3291 * Check variables next. For now we must require them to be identical
3292 * or stuff we can recreate. (No code is emitted here.)
3293 */
3294 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
3295#ifdef VBOX_STRICT
3296 uint32_t const offAssert = off;
3297#endif
3298 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
3299 if (fVars)
3300 {
3301 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
3302 do
3303 {
3304 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
3305 fVars &= ~RT_BIT_32(idxVar);
3306
3307 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
3308 {
3309 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
3310 continue;
3311 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3312 {
3313 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3314 if (idxHstReg != UINT8_MAX)
3315 {
3316 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3317 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3318 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
3319 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3320 }
3321 continue;
3322 }
3323 }
3324 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
3325 continue;
3326
3327 /* Irreconcilable, so drop it. */
3328 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3329 if (idxHstReg != UINT8_MAX)
3330 {
3331 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3332 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3333 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
3334 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3335 }
3336 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3337 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
3338 } while (fVars);
3339 }
3340 Assert(off == offAssert);
3341
3342 /*
3343 * Finally, check that the host register allocations matches.
3344 */
3345 AssertMsgStmt((pReNative->Core.bmHstRegs & (pReNative->Core.bmHstRegs ^ pOther->bmHstRegs)) == 0,
3346 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
3347 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
3348 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3349 }
3350 }
3351
3352 /*
3353 * Define the endif label and maybe the else one if we're still in the 'if' part.
3354 */
3355 if (!fDefinedLabels)
3356 {
3357 if (!pEntry->fInElse)
3358 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3359 else
3360 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3361 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3362 }
3363
3364 /* Pop the conditional stack.*/
3365 pReNative->cCondDepth -= 1;
3366
3367 return off;
3368}
3369
3370
3371/**
3372 * Helper function to convert X86_EFL_xxx masks to liveness masks.
3373 *
3374 * The compiler should be able to figure this out at compile time, so sprinkling
3375 * constexpr where ever possible here to nudge it along.
3376 */
3377template<uint32_t const a_fEfl>
3378RT_CONSTEXPR uint64_t iemNativeEflagsToLivenessMask(void)
3379{
3380 return (a_fEfl & ~X86_EFL_STATUS_BITS ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER) : 0)
3381 | (a_fEfl & X86_EFL_CF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_CF) : 0)
3382 | (a_fEfl & X86_EFL_PF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_PF) : 0)
3383 | (a_fEfl & X86_EFL_AF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_AF) : 0)
3384 | (a_fEfl & X86_EFL_ZF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_ZF) : 0)
3385 | (a_fEfl & X86_EFL_SF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_SF) : 0)
3386 | (a_fEfl & X86_EFL_OF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OF) : 0);
3387}
3388
3389
3390/**
3391 * Helper function to convert a single X86_EFL_xxxx value to bit number.
3392 *
3393 * The compiler should be able to figure this out at compile time, so sprinkling
3394 * constexpr where ever possible here to nudge it along.
3395 */
3396template<uint32_t const a_fEfl>
3397RT_CONSTEXPR unsigned iemNativeEflagsToSingleBitNo(void)
3398{
3399 AssertCompile( a_fEfl == X86_EFL_CF
3400 || a_fEfl == X86_EFL_PF
3401 || a_fEfl == X86_EFL_AF
3402 || a_fEfl == X86_EFL_ZF
3403 || a_fEfl == X86_EFL_SF
3404 || a_fEfl == X86_EFL_OF
3405 || a_fEfl == X86_EFL_DF);
3406 return a_fEfl == X86_EFL_CF ? X86_EFL_CF_BIT
3407 : a_fEfl == X86_EFL_PF ? X86_EFL_PF_BIT
3408 : a_fEfl == X86_EFL_AF ? X86_EFL_AF_BIT
3409 : a_fEfl == X86_EFL_ZF ? X86_EFL_ZF_BIT
3410 : a_fEfl == X86_EFL_SF ? X86_EFL_SF_BIT
3411 : a_fEfl == X86_EFL_OF ? X86_EFL_OF_BIT
3412 : X86_EFL_DF_BIT;
3413}
3414
3415
3416#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
3417 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3418 do {
3419
3420/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
3421DECL_INLINE_THROW(uint32_t)
3422iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3423{
3424 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3425 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3426 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3427
3428 /* Get the eflags. */
3429 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ReadOnly, fLivenessEflBits);
3430
3431 /* Test and jump. */
3432 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3433
3434 /* Free but don't flush the EFlags register. */
3435 iemNativeRegFreeTmp(pReNative, idxEflReg);
3436
3437 /* Make a copy of the core state now as we start the if-block. */
3438 iemNativeCondStartIfBlock(pReNative, off);
3439
3440 return off;
3441}
3442
3443
3444#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
3445 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3446 do {
3447
3448/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
3449DECL_INLINE_THROW(uint32_t)
3450iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3451{
3452 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3453 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3454 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3455
3456 /* Get the eflags. */
3457 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ReadOnly, fLivenessEflBits);
3458
3459 /* Test and jump. */
3460 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3461
3462 /* Free but don't flush the EFlags register. */
3463 iemNativeRegFreeTmp(pReNative, idxEflReg);
3464
3465 /* Make a copy of the core state now as we start the if-block. */
3466 iemNativeCondStartIfBlock(pReNative, off);
3467
3468 return off;
3469}
3470
3471
3472#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
3473 off = iemNativeEmitIfEflagsBitSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3474 iemNativeEflagsToLivenessMask<a_fBit>()); \
3475 do {
3476
3477/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
3478DECL_INLINE_THROW(uint32_t)
3479iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3480{
3481 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3482 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3483 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3484
3485 /* Get the eflags. */
3486 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ReadOnly, fLivenessEflBit);
3487
3488 /* Test and jump. */
3489 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3490
3491 /* Free but don't flush the EFlags register. */
3492 iemNativeRegFreeTmp(pReNative, idxEflReg);
3493
3494 /* Make a copy of the core state now as we start the if-block. */
3495 iemNativeCondStartIfBlock(pReNative, off);
3496
3497 return off;
3498}
3499
3500
3501#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
3502 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3503 iemNativeEflagsToLivenessMask<a_fBit>()); \
3504 do {
3505
3506/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
3507DECL_INLINE_THROW(uint32_t)
3508iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3509{
3510 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3511 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3512 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3513
3514 /* Get the eflags. */
3515 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ReadOnly, fLivenessEflBit);
3516
3517 /* Test and jump. */
3518 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3519
3520 /* Free but don't flush the EFlags register. */
3521 iemNativeRegFreeTmp(pReNative, idxEflReg);
3522
3523 /* Make a copy of the core state now as we start the if-block. */
3524 iemNativeCondStartIfBlock(pReNative, off);
3525
3526 return off;
3527}
3528
3529
3530#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
3531 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3532 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3533 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3534 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3535 do {
3536
3537#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
3538 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3539 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3540 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3541 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3542 do {
3543
3544/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
3545DECL_INLINE_THROW(uint32_t)
3546iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3547 bool fInverted, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3548{
3549 Assert(iBitNo1 != iBitNo2);
3550 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3551 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3552 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3553
3554 /* Get the eflags. */
3555 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ReadOnly, fLivenessEflBits);
3556
3557#ifdef RT_ARCH_AMD64
3558 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1));
3559
3560 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3561 if (iBitNo1 > iBitNo2)
3562 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3563 else
3564 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3565 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3566
3567#elif defined(RT_ARCH_ARM64)
3568 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3569 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3570
3571 /* and tmpreg, eflreg, #1<<iBitNo1 */
3572 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3573
3574 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3575 if (iBitNo1 > iBitNo2)
3576 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3577 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3578 else
3579 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3580 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3581
3582 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3583
3584#else
3585# error "Port me"
3586#endif
3587
3588 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3589 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3590 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3591
3592 /* Free but don't flush the EFlags and tmp registers. */
3593 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3594 iemNativeRegFreeTmp(pReNative, idxEflReg);
3595
3596 /* Make a copy of the core state now as we start the if-block. */
3597 iemNativeCondStartIfBlock(pReNative, off);
3598
3599 return off;
3600}
3601
3602
3603#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
3604 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3605 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3606 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3607 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3608 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3609 do {
3610
3611#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
3612 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3613 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3614 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3615 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3616 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3617 do {
3618
3619/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
3620 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
3621DECL_INLINE_THROW(uint32_t)
3622iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fInverted,
3623 unsigned iBitNo, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3624{
3625 Assert(iBitNo1 != iBitNo);
3626 Assert(iBitNo2 != iBitNo);
3627 Assert(iBitNo2 != iBitNo1);
3628 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3629 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3630 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3631
3632 /* We need an if-block label for the non-inverted variant. */
3633 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
3634 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
3635
3636 /* Get the eflags. */
3637 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ReadOnly, fLivenessEflBits);
3638
3639#ifdef RT_ARCH_AMD64
3640 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1)); /* This must come before we jump anywhere! */
3641#elif defined(RT_ARCH_ARM64)
3642 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3643#endif
3644
3645 /* Check for the lone bit first. */
3646 if (!fInverted)
3647 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3648 else
3649 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
3650
3651 /* Then extract and compare the other two bits. */
3652#ifdef RT_ARCH_AMD64
3653 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3654 if (iBitNo1 > iBitNo2)
3655 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3656 else
3657 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3658 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3659
3660#elif defined(RT_ARCH_ARM64)
3661 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3662
3663 /* and tmpreg, eflreg, #1<<iBitNo1 */
3664 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3665
3666 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3667 if (iBitNo1 > iBitNo2)
3668 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3669 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3670 else
3671 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3672 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3673
3674 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3675
3676#else
3677# error "Port me"
3678#endif
3679
3680 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3681 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3682 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3683
3684 /* Free but don't flush the EFlags and tmp registers. */
3685 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3686 iemNativeRegFreeTmp(pReNative, idxEflReg);
3687
3688 /* Make a copy of the core state now as we start the if-block. */
3689 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3690
3691 return off;
3692}
3693
3694
3695#define IEM_MC_IF_CX_IS_NZ() \
3696 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3697 do {
3698
3699/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3700DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3701{
3702 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3703
3704 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3705 kIemNativeGstRegUse_ReadOnly);
3706 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3707 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3708
3709 iemNativeCondStartIfBlock(pReNative, off);
3710 return off;
3711}
3712
3713
3714#define IEM_MC_IF_ECX_IS_NZ() \
3715 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3716 do {
3717
3718#define IEM_MC_IF_RCX_IS_NZ() \
3719 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3720 do {
3721
3722/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3723DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3724{
3725 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3726
3727 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3728 kIemNativeGstRegUse_ReadOnly);
3729 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3730 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3731
3732 iemNativeCondStartIfBlock(pReNative, off);
3733 return off;
3734}
3735
3736
3737#define IEM_MC_IF_CX_IS_NOT_ONE() \
3738 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3739 do {
3740
3741/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3742DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3743{
3744 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3745
3746 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3747 kIemNativeGstRegUse_ReadOnly);
3748#ifdef RT_ARCH_AMD64
3749 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3750#else
3751 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3752 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3753 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3754#endif
3755 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3756
3757 iemNativeCondStartIfBlock(pReNative, off);
3758 return off;
3759}
3760
3761
3762#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3763 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3764 do {
3765
3766#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3767 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3768 do {
3769
3770/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3771DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3772{
3773 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3774
3775 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3776 kIemNativeGstRegUse_ReadOnly);
3777 if (f64Bit)
3778 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3779 else
3780 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3781 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3782
3783 iemNativeCondStartIfBlock(pReNative, off);
3784 return off;
3785}
3786
3787
3788#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3789 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, \
3790 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3791 iemNativeEflagsToLivenessMask<a_fBit>()); \
3792 do {
3793
3794#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3795 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, \
3796 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3797 iemNativeEflagsToLivenessMask<a_fBit>()); \
3798 do {
3799
3800/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3801 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3802DECL_INLINE_THROW(uint32_t)
3803iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3804 bool fCheckIfSet, unsigned iBitNo, uint64_t fLivenessEflBit)
3805{
3806 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3807 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3808 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3809
3810 /* We have to load both RCX and EFLAGS before we can start branching,
3811 otherwise we'll end up in the else-block with an inconsistent
3812 register allocator state.
3813 Doing EFLAGS first as it's more likely to be loaded, right? */
3814 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ReadOnly, fLivenessEflBit);
3815 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3816 kIemNativeGstRegUse_ReadOnly);
3817
3818 /** @todo we could reduce this to a single branch instruction by spending a
3819 * temporary register and some setnz stuff. Not sure if loops are
3820 * worth it. */
3821 /* Check CX. */
3822#ifdef RT_ARCH_AMD64
3823 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3824#else
3825 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3826 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3827 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3828#endif
3829
3830 /* Check the EFlags bit. */
3831 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3832 !fCheckIfSet /*fJmpIfSet*/);
3833
3834 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3835 iemNativeRegFreeTmp(pReNative, idxEflReg);
3836
3837 iemNativeCondStartIfBlock(pReNative, off);
3838 return off;
3839}
3840
3841
3842#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3843 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, false /*f64Bit*/, \
3844 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3845 iemNativeEflagsToLivenessMask<a_fBit>()); \
3846 do {
3847
3848#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3849 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, false /*f64Bit*/, \
3850 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3851 iemNativeEflagsToLivenessMask<a_fBit>()); \
3852 do {
3853
3854#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3855 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, true /*f64Bit*/, \
3856 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3857 iemNativeEflagsToLivenessMask<a_fBit>()); \
3858 do {
3859
3860#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3861 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, true /*f64Bit*/, \
3862 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3863 iemNativeEflagsToLivenessMask<a_fBit>()); \
3864 do {
3865
3866/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3867 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3868 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3869 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3870DECL_INLINE_THROW(uint32_t)
3871iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fCheckIfSet, bool f64Bit,
3872 unsigned iBitNo, uint64_t fLivenessEFlBit)
3873
3874{
3875 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3876 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3877 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3878
3879 /* We have to load both RCX and EFLAGS before we can start branching,
3880 otherwise we'll end up in the else-block with an inconsistent
3881 register allocator state.
3882 Doing EFLAGS first as it's more likely to be loaded, right? */
3883 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ReadOnly, fLivenessEFlBit);
3884 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3885 kIemNativeGstRegUse_ReadOnly);
3886
3887 /** @todo we could reduce this to a single branch instruction by spending a
3888 * temporary register and some setnz stuff. Not sure if loops are
3889 * worth it. */
3890 /* Check RCX/ECX. */
3891 if (f64Bit)
3892 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3893 else
3894 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3895
3896 /* Check the EFlags bit. */
3897 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3898 !fCheckIfSet /*fJmpIfSet*/);
3899
3900 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3901 iemNativeRegFreeTmp(pReNative, idxEflReg);
3902
3903 iemNativeCondStartIfBlock(pReNative, off);
3904 return off;
3905}
3906
3907
3908#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3909 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3910 do {
3911
3912/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3913DECL_INLINE_THROW(uint32_t)
3914iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3915{
3916 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3917
3918 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3919 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3920 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3921 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3922
3923 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3924
3925 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3926
3927 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3928
3929 iemNativeCondStartIfBlock(pReNative, off);
3930 return off;
3931}
3932
3933
3934#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3935 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3936 do {
3937
3938/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3939DECL_INLINE_THROW(uint32_t)
3940iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3941{
3942 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3943 Assert(iGReg < 16);
3944
3945 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3946 kIemNativeGstRegUse_ReadOnly);
3947
3948 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3949
3950 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3951
3952 iemNativeCondStartIfBlock(pReNative, off);
3953 return off;
3954}
3955
3956
3957
3958/*********************************************************************************************************************************
3959* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3960*********************************************************************************************************************************/
3961
3962#define IEM_MC_NOREF(a_Name) \
3963 RT_NOREF_PV(a_Name)
3964
3965#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3966 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3967
3968#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3969 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3970
3971#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3972 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3973
3974#define IEM_MC_LOCAL(a_Type, a_Name) \
3975 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3976
3977#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3978 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3979
3980#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3981 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3982
3983
3984/**
3985 * Sets the host register for @a idxVarRc to @a idxReg.
3986 *
3987 * Any guest register shadowing will be implictly dropped by this call.
3988 *
3989 * The variable must not have any register associated with it (causes
3990 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3991 * implied.
3992 *
3993 * @returns idxReg
3994 * @param pReNative The recompiler state.
3995 * @param idxVar The variable.
3996 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3997 * @param off For recording in debug info.
3998 * @param fAllocated Set if the register is already allocated, false if not.
3999 *
4000 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
4001 */
4002DECL_INLINE_THROW(uint8_t)
4003iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off, bool fAllocated)
4004{
4005 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4006 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4007 Assert(!pVar->fRegAcquired);
4008 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4009 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
4010 AssertStmt(RT_BOOL(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)) == fAllocated,
4011 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
4012
4013 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
4014 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
4015
4016 iemNativeVarSetKindToStack(pReNative, idxVar);
4017 pVar->idxReg = idxReg;
4018
4019 return idxReg;
4020}
4021
4022
4023/**
4024 * A convenient helper function.
4025 */
4026DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
4027 uint8_t idxReg, uint32_t *poff)
4028{
4029 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff, false /*fAllocated*/);
4030 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
4031 return idxReg;
4032}
4033
4034
4035/**
4036 * This is called by IEM_MC_END() to clean up all variables.
4037 */
4038DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
4039{
4040 uint32_t const bmVars = pReNative->Core.bmVars;
4041 if (bmVars != 0)
4042 iemNativeVarFreeAllSlow(pReNative, bmVars);
4043 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
4044 Assert(pReNative->Core.bmStack == 0);
4045}
4046
4047
4048#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
4049
4050/**
4051 * This is called by IEM_MC_FREE_LOCAL.
4052 */
4053DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4054{
4055 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4056 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
4057 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4058}
4059
4060
4061#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
4062
4063/**
4064 * This is called by IEM_MC_FREE_ARG.
4065 */
4066DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4067{
4068 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4069 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
4070 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4071}
4072
4073
4074#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
4075
4076/**
4077 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
4078 */
4079DECL_INLINE_THROW(uint32_t)
4080iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
4081{
4082 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
4083 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
4084 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4085 Assert( pVarDst->cbVar == sizeof(uint16_t)
4086 || pVarDst->cbVar == sizeof(uint32_t));
4087
4088 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
4089 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
4090 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
4091 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
4092 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4093
4094 Assert(pVarDst->cbVar < pVarSrc->cbVar);
4095
4096 /*
4097 * Special case for immediates.
4098 */
4099 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
4100 {
4101 switch (pVarDst->cbVar)
4102 {
4103 case sizeof(uint16_t):
4104 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
4105 break;
4106 case sizeof(uint32_t):
4107 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
4108 break;
4109 default: AssertFailed(); break;
4110 }
4111 }
4112 else
4113 {
4114 /*
4115 * The generic solution for now.
4116 */
4117 /** @todo optimize this by having the python script make sure the source
4118 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
4119 * statement. Then we could just transfer the register assignments. */
4120 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
4121 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
4122 switch (pVarDst->cbVar)
4123 {
4124 case sizeof(uint16_t):
4125 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
4126 break;
4127 case sizeof(uint32_t):
4128 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
4129 break;
4130 default: AssertFailed(); break;
4131 }
4132 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
4133 iemNativeVarRegisterRelease(pReNative, idxVarDst);
4134 }
4135 return off;
4136}
4137
4138
4139
4140/*********************************************************************************************************************************
4141* Emitters for IEM_MC_CALL_CIMPL_XXX *
4142*********************************************************************************************************************************/
4143
4144/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
4145DECL_INLINE_THROW(uint32_t)
4146iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
4147 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
4148
4149{
4150 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
4151 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
4152
4153#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4154 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
4155 when a calls clobber any of the relevant control registers. */
4156# if 1
4157 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
4158 {
4159 /* Likely as long as call+ret are done via cimpl. */
4160 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
4161 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
4162 }
4163 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
4164 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4165 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
4166 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4167 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
4168 else
4169 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4170 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4171 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4172
4173# else
4174 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
4175 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4176 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
4177 pReNative->fSimdRaiseXcptChecksEmitted = 0;
4178 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
4179 || pfnCImpl == (uintptr_t)iemCImpl_callf
4180 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
4181 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
4182 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4183 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4184 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4185# endif
4186
4187# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
4188 /* Mark the host floating point control register as not synced if MXCSR is modified. */
4189 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
4190 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
4191# endif
4192#endif
4193
4194 /*
4195 * Do all the call setup and cleanup.
4196 */
4197 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
4198
4199 /*
4200 * Load the two or three hidden arguments.
4201 */
4202#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4203 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4204 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4205 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
4206#else
4207 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4208 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
4209#endif
4210
4211 /*
4212 * Make the call and check the return code.
4213 *
4214 * Shadow PC copies are always flushed here, other stuff depends on flags.
4215 * Segment and general purpose registers are explictily flushed via the
4216 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
4217 * macros.
4218 */
4219 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
4220#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4221 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4222#endif
4223 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
4224 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
4225 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4226 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4227
4228#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
4229 pReNative->Core.fDebugPcInitialized = false;
4230 Log4(("fDebugPcInitialized=false cimpl off=%#x (v1)\n", off));
4231#endif
4232
4233 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4234}
4235
4236
4237#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4238 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
4239
4240/** Emits code for IEM_MC_CALL_CIMPL_1. */
4241DECL_INLINE_THROW(uint32_t)
4242iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4243 uintptr_t pfnCImpl, uint8_t idxArg0)
4244{
4245 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4246 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
4247}
4248
4249
4250#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4251 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
4252
4253/** Emits code for IEM_MC_CALL_CIMPL_2. */
4254DECL_INLINE_THROW(uint32_t)
4255iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4256 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
4257{
4258 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4259 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4260 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
4261}
4262
4263
4264#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4265 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4266 (uintptr_t)a_pfnCImpl, a0, a1, a2)
4267
4268/** Emits code for IEM_MC_CALL_CIMPL_3. */
4269DECL_INLINE_THROW(uint32_t)
4270iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4271 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4272{
4273 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4274 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4275 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4276 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
4277}
4278
4279
4280#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
4281 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4282 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
4283
4284/** Emits code for IEM_MC_CALL_CIMPL_4. */
4285DECL_INLINE_THROW(uint32_t)
4286iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4287 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4288{
4289 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4290 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4291 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4292 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4293 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
4294}
4295
4296
4297#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
4298 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4299 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
4300
4301/** Emits code for IEM_MC_CALL_CIMPL_4. */
4302DECL_INLINE_THROW(uint32_t)
4303iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4304 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
4305{
4306 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4307 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4308 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4309 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4310 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
4311 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
4312}
4313
4314
4315/** Recompiler debugging: Flush guest register shadow copies. */
4316#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
4317
4318
4319
4320/*********************************************************************************************************************************
4321* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
4322*********************************************************************************************************************************/
4323
4324/**
4325 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
4326 */
4327DECL_INLINE_THROW(uint32_t)
4328iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4329 uintptr_t pfnAImpl, uint8_t cArgs)
4330{
4331 if (idxVarRc != UINT8_MAX)
4332 {
4333 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
4334 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
4335 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
4336 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
4337 }
4338
4339 /*
4340 * Do all the call setup and cleanup.
4341 *
4342 * It is only required to flush pending guest register writes in call volatile registers as
4343 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
4344 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
4345 * no matter the fFlushPendingWrites parameter.
4346 */
4347 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
4348
4349 /*
4350 * Make the call and update the return code variable if we've got one.
4351 */
4352 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
4353 if (idxVarRc != UINT8_MAX)
4354 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off, false /*fAllocated*/);
4355
4356 return off;
4357}
4358
4359
4360
4361#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
4362 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
4363
4364#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
4365 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
4366
4367/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
4368DECL_INLINE_THROW(uint32_t)
4369iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
4370{
4371 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
4372}
4373
4374
4375#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
4376 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
4377
4378#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
4379 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
4380
4381/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
4382DECL_INLINE_THROW(uint32_t)
4383iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
4384{
4385 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4386 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
4387}
4388
4389
4390#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
4391 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
4392
4393#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
4394 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
4395
4396/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
4397DECL_INLINE_THROW(uint32_t)
4398iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4399 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
4400{
4401 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4402 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4403 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
4404}
4405
4406
4407#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
4408 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
4409
4410#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
4411 IEM_MC_LOCAL(a_rcType, a_rc); \
4412 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
4413
4414/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
4415DECL_INLINE_THROW(uint32_t)
4416iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4417 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4418{
4419 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4420 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4421 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4422 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
4423}
4424
4425
4426#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
4427 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4428
4429#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
4430 IEM_MC_LOCAL(a_rcType, a_rc); \
4431 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4432
4433/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
4434DECL_INLINE_THROW(uint32_t)
4435iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4436 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4437{
4438 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4439 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4440 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4441 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
4442 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
4443}
4444
4445
4446
4447/*********************************************************************************************************************************
4448* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
4449*********************************************************************************************************************************/
4450
4451#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
4452 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
4453
4454#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4455 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
4456
4457#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4458 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
4459
4460#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4461 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
4462
4463
4464/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
4465 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
4466DECL_INLINE_THROW(uint32_t)
4467iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
4468{
4469 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4470 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4471 Assert(iGRegEx < 20);
4472
4473 /* Same discussion as in iemNativeEmitFetchGregU16 */
4474 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4475 kIemNativeGstRegUse_ReadOnly);
4476
4477 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4478 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4479
4480 /* The value is zero-extended to the full 64-bit host register width. */
4481 if (iGRegEx < 16)
4482 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4483 else
4484 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4485
4486 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4487 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4488 return off;
4489}
4490
4491
4492#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4493 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
4494
4495#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4496 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
4497
4498#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4499 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
4500
4501/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
4502DECL_INLINE_THROW(uint32_t)
4503iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
4504{
4505 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4506 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4507 Assert(iGRegEx < 20);
4508
4509 /* Same discussion as in iemNativeEmitFetchGregU16 */
4510 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4511 kIemNativeGstRegUse_ReadOnly);
4512
4513 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4514 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4515
4516 if (iGRegEx < 16)
4517 {
4518 switch (cbSignExtended)
4519 {
4520 case sizeof(uint16_t):
4521 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4522 break;
4523 case sizeof(uint32_t):
4524 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4525 break;
4526 case sizeof(uint64_t):
4527 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4528 break;
4529 default: AssertFailed(); break;
4530 }
4531 }
4532 else
4533 {
4534 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4535 switch (cbSignExtended)
4536 {
4537 case sizeof(uint16_t):
4538 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4539 break;
4540 case sizeof(uint32_t):
4541 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4542 break;
4543 case sizeof(uint64_t):
4544 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4545 break;
4546 default: AssertFailed(); break;
4547 }
4548 }
4549
4550 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4551 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4552 return off;
4553}
4554
4555
4556
4557#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
4558 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
4559
4560#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
4561 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4562
4563#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
4564 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4565
4566/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
4567DECL_INLINE_THROW(uint32_t)
4568iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4569{
4570 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4571 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4572 Assert(iGReg < 16);
4573
4574 /*
4575 * We can either just load the low 16-bit of the GPR into a host register
4576 * for the variable, or we can do so via a shadow copy host register. The
4577 * latter will avoid having to reload it if it's being stored later, but
4578 * will waste a host register if it isn't touched again. Since we don't
4579 * know what going to happen, we choose the latter for now.
4580 */
4581 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4582 kIemNativeGstRegUse_ReadOnly);
4583
4584 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4585 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4586 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4587 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4588
4589 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4590 return off;
4591}
4592
4593#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
4594 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
4595
4596/** Emits code for IEM_MC_FETCH_GREG_I16. */
4597DECL_INLINE_THROW(uint32_t)
4598iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4599{
4600 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4601 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
4602 Assert(iGReg < 16);
4603
4604 /*
4605 * We can either just load the low 16-bit of the GPR into a host register
4606 * for the variable, or we can do so via a shadow copy host register. The
4607 * latter will avoid having to reload it if it's being stored later, but
4608 * will waste a host register if it isn't touched again. Since we don't
4609 * know what going to happen, we choose the latter for now.
4610 */
4611 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4612 kIemNativeGstRegUse_ReadOnly);
4613
4614 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4615 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4616#ifdef RT_ARCH_AMD64
4617 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4618#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
4619 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4620#endif
4621 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4622
4623 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4624 return off;
4625}
4626
4627
4628#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
4629 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4630
4631#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
4632 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4633
4634/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
4635DECL_INLINE_THROW(uint32_t)
4636iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
4637{
4638 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4639 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4640 Assert(iGReg < 16);
4641
4642 /*
4643 * We can either just load the low 16-bit of the GPR into a host register
4644 * for the variable, or we can do so via a shadow copy host register. The
4645 * latter will avoid having to reload it if it's being stored later, but
4646 * will waste a host register if it isn't touched again. Since we don't
4647 * know what going to happen, we choose the latter for now.
4648 */
4649 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4650 kIemNativeGstRegUse_ReadOnly);
4651
4652 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4653 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4654 if (cbSignExtended == sizeof(uint32_t))
4655 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4656 else
4657 {
4658 Assert(cbSignExtended == sizeof(uint64_t));
4659 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4660 }
4661 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4662
4663 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4664 return off;
4665}
4666
4667
4668#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
4669 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
4670
4671#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
4672 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
4673
4674#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
4675 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
4676
4677/** Emits code for IEM_MC_FETCH_GREG_U32. */
4678DECL_INLINE_THROW(uint32_t)
4679iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4680{
4681 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4682 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4683 Assert(iGReg < 16);
4684
4685 /*
4686 * We can either just load the low 16-bit of the GPR into a host register
4687 * for the variable, or we can do so via a shadow copy host register. The
4688 * latter will avoid having to reload it if it's being stored later, but
4689 * will waste a host register if it isn't touched again. Since we don't
4690 * know what going to happen, we choose the latter for now.
4691 */
4692 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4693 kIemNativeGstRegUse_ReadOnly);
4694
4695 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4696 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4697 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4698 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4699
4700 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4701 return off;
4702}
4703
4704
4705#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4706 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4707
4708/** Emits code for IEM_MC_FETCH_GREG_U32. */
4709DECL_INLINE_THROW(uint32_t)
4710iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4711{
4712 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4713 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4714 Assert(iGReg < 16);
4715
4716 /*
4717 * We can either just load the low 32-bit of the GPR into a host register
4718 * for the variable, or we can do so via a shadow copy host register. The
4719 * latter will avoid having to reload it if it's being stored later, but
4720 * will waste a host register if it isn't touched again. Since we don't
4721 * know what going to happen, we choose the latter for now.
4722 */
4723 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4724 kIemNativeGstRegUse_ReadOnly);
4725
4726 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4727 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4728 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4729 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4730
4731 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4732 return off;
4733}
4734
4735
4736#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4737 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4738
4739#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4740 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4741
4742/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4743 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4744DECL_INLINE_THROW(uint32_t)
4745iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4746{
4747 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4748 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4749 Assert(iGReg < 16);
4750
4751 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4752 kIemNativeGstRegUse_ReadOnly);
4753
4754 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4755 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4756 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4757 /** @todo name the register a shadow one already? */
4758 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4759
4760 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4761 return off;
4762}
4763
4764
4765#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4766#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4767 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4768
4769/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4770DECL_INLINE_THROW(uint32_t)
4771iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4772{
4773 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4774 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4775 Assert(iGRegLo < 16 && iGRegHi < 16);
4776
4777 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4778 kIemNativeGstRegUse_ReadOnly);
4779 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4780 kIemNativeGstRegUse_ReadOnly);
4781
4782 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4783 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4784 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4785 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4786
4787 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4788 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4789 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4790 return off;
4791}
4792#endif
4793
4794
4795/*********************************************************************************************************************************
4796* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4797*********************************************************************************************************************************/
4798
4799#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4800 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4801
4802/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4803DECL_INLINE_THROW(uint32_t)
4804iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4805{
4806 Assert(iGRegEx < 20);
4807 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4808 kIemNativeGstRegUse_ForUpdate);
4809#ifdef RT_ARCH_AMD64
4810 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4811
4812 /* To the lowest byte of the register: mov r8, imm8 */
4813 if (iGRegEx < 16)
4814 {
4815 if (idxGstTmpReg >= 8)
4816 pbCodeBuf[off++] = X86_OP_REX_B;
4817 else if (idxGstTmpReg >= 4)
4818 pbCodeBuf[off++] = X86_OP_REX;
4819 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4820 pbCodeBuf[off++] = u8Value;
4821 }
4822 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4823 else if (idxGstTmpReg < 4)
4824 {
4825 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4826 pbCodeBuf[off++] = u8Value;
4827 }
4828 else
4829 {
4830 /* ror reg64, 8 */
4831 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4832 pbCodeBuf[off++] = 0xc1;
4833 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4834 pbCodeBuf[off++] = 8;
4835
4836 /* mov reg8, imm8 */
4837 if (idxGstTmpReg >= 8)
4838 pbCodeBuf[off++] = X86_OP_REX_B;
4839 else if (idxGstTmpReg >= 4)
4840 pbCodeBuf[off++] = X86_OP_REX;
4841 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4842 pbCodeBuf[off++] = u8Value;
4843
4844 /* rol reg64, 8 */
4845 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4846 pbCodeBuf[off++] = 0xc1;
4847 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4848 pbCodeBuf[off++] = 8;
4849 }
4850
4851#elif defined(RT_ARCH_ARM64)
4852 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4853 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4854 if (iGRegEx < 16)
4855 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4856 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4857 else
4858 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4859 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4860 iemNativeRegFreeTmp(pReNative, idxImmReg);
4861
4862#else
4863# error "Port me!"
4864#endif
4865
4866 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4867
4868#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4869 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4870#endif
4871
4872 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4873 return off;
4874}
4875
4876
4877#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4878 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4879
4880/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4881DECL_INLINE_THROW(uint32_t)
4882iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4883{
4884 Assert(iGRegEx < 20);
4885 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4886
4887 /*
4888 * If it's a constant value (unlikely) we treat this as a
4889 * IEM_MC_STORE_GREG_U8_CONST statement.
4890 */
4891 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4892 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4893 { /* likely */ }
4894 else
4895 {
4896 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4897 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4898 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4899 }
4900
4901 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4902 kIemNativeGstRegUse_ForUpdate);
4903 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4904
4905#ifdef RT_ARCH_AMD64
4906 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4907 if (iGRegEx < 16)
4908 {
4909 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4910 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4911 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4912 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4913 pbCodeBuf[off++] = X86_OP_REX;
4914 pbCodeBuf[off++] = 0x8a;
4915 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4916 }
4917 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4918 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4919 {
4920 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4921 pbCodeBuf[off++] = 0x8a;
4922 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4923 }
4924 else
4925 {
4926 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4927
4928 /* ror reg64, 8 */
4929 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4930 pbCodeBuf[off++] = 0xc1;
4931 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4932 pbCodeBuf[off++] = 8;
4933
4934 /* mov reg8, reg8(r/m) */
4935 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4936 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4937 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4938 pbCodeBuf[off++] = X86_OP_REX;
4939 pbCodeBuf[off++] = 0x8a;
4940 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4941
4942 /* rol reg64, 8 */
4943 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4944 pbCodeBuf[off++] = 0xc1;
4945 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4946 pbCodeBuf[off++] = 8;
4947 }
4948
4949#elif defined(RT_ARCH_ARM64)
4950 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4951 or
4952 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4953 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4954 if (iGRegEx < 16)
4955 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4956 else
4957 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4958
4959#else
4960# error "Port me!"
4961#endif
4962 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4963
4964 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4965
4966#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4967 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4968#endif
4969 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4970 return off;
4971}
4972
4973
4974
4975#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4976 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4977
4978/** Emits code for IEM_MC_STORE_GREG_U16. */
4979DECL_INLINE_THROW(uint32_t)
4980iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4981{
4982 Assert(iGReg < 16);
4983 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4984 kIemNativeGstRegUse_ForUpdate);
4985#ifdef RT_ARCH_AMD64
4986 /* mov reg16, imm16 */
4987 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4988 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4989 if (idxGstTmpReg >= 8)
4990 pbCodeBuf[off++] = X86_OP_REX_B;
4991 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4992 pbCodeBuf[off++] = RT_BYTE1(uValue);
4993 pbCodeBuf[off++] = RT_BYTE2(uValue);
4994
4995#elif defined(RT_ARCH_ARM64)
4996 /* movk xdst, #uValue, lsl #0 */
4997 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4998 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4999
5000#else
5001# error "Port me!"
5002#endif
5003
5004 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5005
5006#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5007 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5008#endif
5009 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5010 return off;
5011}
5012
5013
5014#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
5015 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
5016
5017/** Emits code for IEM_MC_STORE_GREG_U16. */
5018DECL_INLINE_THROW(uint32_t)
5019iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5020{
5021 Assert(iGReg < 16);
5022 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5023
5024 /*
5025 * If it's a constant value (unlikely) we treat this as a
5026 * IEM_MC_STORE_GREG_U16_CONST statement.
5027 */
5028 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5029 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5030 { /* likely */ }
5031 else
5032 {
5033 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5034 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5035 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
5036 }
5037
5038 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5039 kIemNativeGstRegUse_ForUpdate);
5040
5041#ifdef RT_ARCH_AMD64
5042 /* mov reg16, reg16 or [mem16] */
5043 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
5044 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5045 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5046 {
5047 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
5048 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
5049 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
5050 pbCodeBuf[off++] = 0x8b;
5051 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
5052 }
5053 else
5054 {
5055 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
5056 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
5057 if (idxGstTmpReg >= 8)
5058 pbCodeBuf[off++] = X86_OP_REX_R;
5059 pbCodeBuf[off++] = 0x8b;
5060 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
5061 }
5062
5063#elif defined(RT_ARCH_ARM64)
5064 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
5065 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
5066 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5067 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
5068 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5069
5070#else
5071# error "Port me!"
5072#endif
5073
5074 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5075
5076#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5077 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5078#endif
5079 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5080 return off;
5081}
5082
5083
5084#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
5085 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
5086
5087/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
5088DECL_INLINE_THROW(uint32_t)
5089iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
5090{
5091 Assert(iGReg < 16);
5092 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5093 kIemNativeGstRegUse_ForFullWrite);
5094 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5095#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5096 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5097#endif
5098 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5099 return off;
5100}
5101
5102
5103#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
5104 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
5105
5106#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
5107 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
5108
5109/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
5110DECL_INLINE_THROW(uint32_t)
5111iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5112{
5113 Assert(iGReg < 16);
5114 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5115
5116 /*
5117 * If it's a constant value (unlikely) we treat this as a
5118 * IEM_MC_STORE_GREG_U32_CONST statement.
5119 */
5120 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5121 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5122 { /* likely */ }
5123 else
5124 {
5125 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5126 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5127 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
5128 }
5129
5130 /*
5131 * For the rest we allocate a guest register for the variable and writes
5132 * it to the CPUMCTX structure.
5133 */
5134 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5135#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5136 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5137#else
5138 RT_NOREF(idxVarReg);
5139#endif
5140#ifdef VBOX_STRICT
5141 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
5142#endif
5143 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5144 return off;
5145}
5146
5147
5148#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
5149 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
5150
5151/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
5152DECL_INLINE_THROW(uint32_t)
5153iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
5154{
5155 Assert(iGReg < 16);
5156 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5157 kIemNativeGstRegUse_ForFullWrite);
5158 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5159#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5160 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5161#endif
5162 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5163 return off;
5164}
5165
5166
5167#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
5168 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
5169
5170#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
5171 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
5172
5173/** Emits code for IEM_MC_STORE_GREG_U64. */
5174DECL_INLINE_THROW(uint32_t)
5175iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5176{
5177 Assert(iGReg < 16);
5178 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5179
5180 /*
5181 * If it's a constant value (unlikely) we treat this as a
5182 * IEM_MC_STORE_GREG_U64_CONST statement.
5183 */
5184 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5185 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5186 { /* likely */ }
5187 else
5188 {
5189 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5190 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5191 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
5192 }
5193
5194 /*
5195 * For the rest we allocate a guest register for the variable and writes
5196 * it to the CPUMCTX structure.
5197 */
5198 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5199#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5200 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5201#else
5202 RT_NOREF(idxVarReg);
5203#endif
5204 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5205 return off;
5206}
5207
5208
5209#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
5210 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
5211
5212/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
5213DECL_INLINE_THROW(uint32_t)
5214iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
5215{
5216 Assert(iGReg < 16);
5217 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5218 kIemNativeGstRegUse_ForUpdate);
5219 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
5220#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5221 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5222#endif
5223 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5224 return off;
5225}
5226
5227
5228#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5229#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
5230 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
5231
5232/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
5233DECL_INLINE_THROW(uint32_t)
5234iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
5235{
5236 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5237 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
5238 Assert(iGRegLo < 16 && iGRegHi < 16);
5239
5240 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
5241 kIemNativeGstRegUse_ForFullWrite);
5242 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
5243 kIemNativeGstRegUse_ForFullWrite);
5244
5245 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5246 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
5247 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
5248 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
5249
5250 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
5251 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
5252 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
5253 return off;
5254}
5255#endif
5256
5257
5258/*********************************************************************************************************************************
5259* General purpose register manipulation (add, sub). *
5260*********************************************************************************************************************************/
5261
5262#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5263 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5264
5265/** Emits code for IEM_MC_ADD_GREG_U16. */
5266DECL_INLINE_THROW(uint32_t)
5267iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
5268{
5269 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5270 kIemNativeGstRegUse_ForUpdate);
5271
5272#ifdef RT_ARCH_AMD64
5273 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5274 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5275 if (idxGstTmpReg >= 8)
5276 pbCodeBuf[off++] = X86_OP_REX_B;
5277 if (uAddend == 1)
5278 {
5279 pbCodeBuf[off++] = 0xff; /* inc */
5280 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5281 }
5282 else
5283 {
5284 pbCodeBuf[off++] = 0x81;
5285 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5286 pbCodeBuf[off++] = uAddend;
5287 pbCodeBuf[off++] = 0;
5288 }
5289
5290#else
5291 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5292 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5293
5294 /* sub tmp, gstgrp, uAddend */
5295 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
5296
5297 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5298 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5299
5300 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5301#endif
5302
5303 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5304
5305#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5306 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5307#endif
5308
5309 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5310 return off;
5311}
5312
5313
5314#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
5315 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5316
5317#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
5318 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5319
5320/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
5321DECL_INLINE_THROW(uint32_t)
5322iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
5323{
5324 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5325 kIemNativeGstRegUse_ForUpdate);
5326
5327#ifdef RT_ARCH_AMD64
5328 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5329 if (f64Bit)
5330 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5331 else if (idxGstTmpReg >= 8)
5332 pbCodeBuf[off++] = X86_OP_REX_B;
5333 if (uAddend == 1)
5334 {
5335 pbCodeBuf[off++] = 0xff; /* inc */
5336 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5337 }
5338 else if (uAddend < 128)
5339 {
5340 pbCodeBuf[off++] = 0x83; /* add */
5341 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5342 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5343 }
5344 else
5345 {
5346 pbCodeBuf[off++] = 0x81; /* add */
5347 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5348 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5349 pbCodeBuf[off++] = 0;
5350 pbCodeBuf[off++] = 0;
5351 pbCodeBuf[off++] = 0;
5352 }
5353
5354#else
5355 /* sub tmp, gstgrp, uAddend */
5356 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5357 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
5358
5359#endif
5360
5361 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5362
5363#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5364 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5365#endif
5366
5367 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5368 return off;
5369}
5370
5371
5372
5373#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5374 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5375
5376/** Emits code for IEM_MC_SUB_GREG_U16. */
5377DECL_INLINE_THROW(uint32_t)
5378iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5379{
5380 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5381 kIemNativeGstRegUse_ForUpdate);
5382
5383#ifdef RT_ARCH_AMD64
5384 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5385 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5386 if (idxGstTmpReg >= 8)
5387 pbCodeBuf[off++] = X86_OP_REX_B;
5388 if (uSubtrahend == 1)
5389 {
5390 pbCodeBuf[off++] = 0xff; /* dec */
5391 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5392 }
5393 else
5394 {
5395 pbCodeBuf[off++] = 0x81;
5396 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5397 pbCodeBuf[off++] = uSubtrahend;
5398 pbCodeBuf[off++] = 0;
5399 }
5400
5401#else
5402 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5403 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5404
5405 /* sub tmp, gstgrp, uSubtrahend */
5406 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5407
5408 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5409 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5410
5411 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5412#endif
5413
5414 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5415
5416#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5417 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5418#endif
5419
5420 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5421 return off;
5422}
5423
5424
5425#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5426 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5427
5428#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5429 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5430
5431/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5432DECL_INLINE_THROW(uint32_t)
5433iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5434{
5435 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5436 kIemNativeGstRegUse_ForUpdate);
5437
5438#ifdef RT_ARCH_AMD64
5439 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5440 if (f64Bit)
5441 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5442 else if (idxGstTmpReg >= 8)
5443 pbCodeBuf[off++] = X86_OP_REX_B;
5444 if (uSubtrahend == 1)
5445 {
5446 pbCodeBuf[off++] = 0xff; /* dec */
5447 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5448 }
5449 else if (uSubtrahend < 128)
5450 {
5451 pbCodeBuf[off++] = 0x83; /* sub */
5452 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5453 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5454 }
5455 else
5456 {
5457 pbCodeBuf[off++] = 0x81; /* sub */
5458 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5459 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5460 pbCodeBuf[off++] = 0;
5461 pbCodeBuf[off++] = 0;
5462 pbCodeBuf[off++] = 0;
5463 }
5464
5465#else
5466 /* sub tmp, gstgrp, uSubtrahend */
5467 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5468 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5469
5470#endif
5471
5472 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5473
5474#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5475 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5476#endif
5477
5478 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5479 return off;
5480}
5481
5482
5483#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
5484 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5485
5486#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
5487 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5488
5489#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
5490 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5491
5492#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
5493 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5494
5495/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
5496DECL_INLINE_THROW(uint32_t)
5497iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5498{
5499#ifdef VBOX_STRICT
5500 switch (cbMask)
5501 {
5502 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5503 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5504 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5505 case sizeof(uint64_t): break;
5506 default: AssertFailedBreak();
5507 }
5508#endif
5509
5510 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5511 kIemNativeGstRegUse_ForUpdate);
5512
5513 switch (cbMask)
5514 {
5515 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5516 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
5517 break;
5518 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
5519 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
5520 break;
5521 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5522 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5523 break;
5524 case sizeof(uint64_t):
5525 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
5526 break;
5527 default: AssertFailedBreak();
5528 }
5529
5530 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5531
5532#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5533 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5534#endif
5535
5536 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5537 return off;
5538}
5539
5540
5541#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
5542 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5543
5544#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
5545 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5546
5547#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
5548 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5549
5550#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
5551 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5552
5553/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
5554DECL_INLINE_THROW(uint32_t)
5555iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5556{
5557#ifdef VBOX_STRICT
5558 switch (cbMask)
5559 {
5560 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5561 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5562 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5563 case sizeof(uint64_t): break;
5564 default: AssertFailedBreak();
5565 }
5566#endif
5567
5568 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5569 kIemNativeGstRegUse_ForUpdate);
5570
5571 switch (cbMask)
5572 {
5573 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5574 case sizeof(uint16_t):
5575 case sizeof(uint64_t):
5576 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
5577 break;
5578 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5579 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5580 break;
5581 default: AssertFailedBreak();
5582 }
5583
5584 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5585
5586#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5587 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5588#endif
5589
5590 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5591 return off;
5592}
5593
5594
5595/*********************************************************************************************************************************
5596* Local/Argument variable manipulation (add, sub, and, or). *
5597*********************************************************************************************************************************/
5598
5599#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
5600 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5601
5602#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
5603 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5604
5605#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
5606 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5607
5608#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
5609 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5610
5611
5612#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
5613 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
5614
5615#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
5616 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
5617
5618#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
5619 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
5620
5621/** Emits code for AND'ing a local and a constant value. */
5622DECL_INLINE_THROW(uint32_t)
5623iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5624{
5625#ifdef VBOX_STRICT
5626 switch (cbMask)
5627 {
5628 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5629 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5630 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5631 case sizeof(uint64_t): break;
5632 default: AssertFailedBreak();
5633 }
5634#endif
5635
5636 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5637 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5638
5639 if (cbMask <= sizeof(uint32_t))
5640 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
5641 else
5642 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
5643
5644 iemNativeVarRegisterRelease(pReNative, idxVar);
5645 return off;
5646}
5647
5648
5649#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
5650 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5651
5652#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
5653 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5654
5655#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
5656 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5657
5658#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
5659 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5660
5661/** Emits code for OR'ing a local and a constant value. */
5662DECL_INLINE_THROW(uint32_t)
5663iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5664{
5665#ifdef VBOX_STRICT
5666 switch (cbMask)
5667 {
5668 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5669 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5670 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5671 case sizeof(uint64_t): break;
5672 default: AssertFailedBreak();
5673 }
5674#endif
5675
5676 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5677 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5678
5679 if (cbMask <= sizeof(uint32_t))
5680 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
5681 else
5682 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
5683
5684 iemNativeVarRegisterRelease(pReNative, idxVar);
5685 return off;
5686}
5687
5688
5689#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
5690 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
5691
5692#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
5693 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
5694
5695#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
5696 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
5697
5698/** Emits code for reversing the byte order in a local value. */
5699DECL_INLINE_THROW(uint32_t)
5700iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
5701{
5702 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5703 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5704
5705 switch (cbLocal)
5706 {
5707 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5708 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5709 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5710 default: AssertFailedBreak();
5711 }
5712
5713 iemNativeVarRegisterRelease(pReNative, idxVar);
5714 return off;
5715}
5716
5717
5718#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5719 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5720
5721#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5722 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5723
5724#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5725 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5726
5727/** Emits code for shifting left a local value. */
5728DECL_INLINE_THROW(uint32_t)
5729iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5730{
5731#ifdef VBOX_STRICT
5732 switch (cbLocal)
5733 {
5734 case sizeof(uint8_t): Assert(cShift < 8); break;
5735 case sizeof(uint16_t): Assert(cShift < 16); break;
5736 case sizeof(uint32_t): Assert(cShift < 32); break;
5737 case sizeof(uint64_t): Assert(cShift < 64); break;
5738 default: AssertFailedBreak();
5739 }
5740#endif
5741
5742 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5743 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5744
5745 if (cbLocal <= sizeof(uint32_t))
5746 {
5747 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5748 if (cbLocal < sizeof(uint32_t))
5749 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5750 cbLocal == sizeof(uint16_t)
5751 ? UINT32_C(0xffff)
5752 : UINT32_C(0xff));
5753 }
5754 else
5755 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5756
5757 iemNativeVarRegisterRelease(pReNative, idxVar);
5758 return off;
5759}
5760
5761
5762#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5763 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5764
5765#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5766 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5767
5768#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5769 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5770
5771/** Emits code for shifting left a local value. */
5772DECL_INLINE_THROW(uint32_t)
5773iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5774{
5775#ifdef VBOX_STRICT
5776 switch (cbLocal)
5777 {
5778 case sizeof(int8_t): Assert(cShift < 8); break;
5779 case sizeof(int16_t): Assert(cShift < 16); break;
5780 case sizeof(int32_t): Assert(cShift < 32); break;
5781 case sizeof(int64_t): Assert(cShift < 64); break;
5782 default: AssertFailedBreak();
5783 }
5784#endif
5785
5786 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5787 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5788
5789 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5790 if (cbLocal == sizeof(uint8_t))
5791 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5792 else if (cbLocal == sizeof(uint16_t))
5793 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5794
5795 if (cbLocal <= sizeof(uint32_t))
5796 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5797 else
5798 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5799
5800 iemNativeVarRegisterRelease(pReNative, idxVar);
5801 return off;
5802}
5803
5804
5805#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5806 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5807
5808#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5809 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5810
5811#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5812 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5813
5814/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5815DECL_INLINE_THROW(uint32_t)
5816iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5817{
5818 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5819 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5820 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5821 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5822
5823 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5824 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
5825
5826 /* Need to sign extend the value. */
5827 if (cbLocal <= sizeof(uint32_t))
5828 {
5829/** @todo ARM64: In case of boredone, the extended add instruction can do the
5830 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5831 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5832
5833 switch (cbLocal)
5834 {
5835 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5836 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5837 default: AssertFailed();
5838 }
5839
5840 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5841 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5842 }
5843 else
5844 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5845
5846 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5847 iemNativeVarRegisterRelease(pReNative, idxVar);
5848 return off;
5849}
5850
5851
5852
5853/*********************************************************************************************************************************
5854* EFLAGS *
5855*********************************************************************************************************************************/
5856
5857#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5858# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5859#else
5860# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5861 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5862
5863DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5864{
5865 if (fEflOutput)
5866 {
5867 PVMCPUCC const pVCpu = pReNative->pVCpu;
5868# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5869 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5870 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5871 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5872# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5873 if (fEflOutput & (a_fEfl)) \
5874 { \
5875 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5876 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5877 else \
5878 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5879 } else do { } while (0)
5880# else
5881 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5882 IEMLIVENESSBIT const LivenessClobbered = { IEMLIVENESS_STATE_GET_WILL_BE_CLOBBERED_SET(pLivenessEntry) };
5883 IEMLIVENESSBIT const LivenessDelayable = { IEMLIVENESS_STATE_GET_CAN_BE_POSTPONED_SET(pLivenessEntry) };
5884# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5885 if (fEflOutput & (a_fEfl)) \
5886 { \
5887 if (LivenessClobbered.a_fLivenessMember) \
5888 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5889 else if (LivenessDelayable.a_fLivenessMember) \
5890 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5891 else \
5892 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5893 } else do { } while (0)
5894# endif
5895 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5896 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5897 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5898 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5899 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5900 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5901 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5902# undef CHECK_FLAG_AND_UPDATE_STATS
5903 }
5904 RT_NOREF(fEflInput);
5905}
5906#endif /* VBOX_WITH_STATISTICS */
5907
5908#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5909#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5910 off = iemNativeEmitFetchEFlags<a_fEflInput, iemNativeEflagsToLivenessMask<a_fEflInput>(),\
5911 a_fEflOutput, iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags)
5912
5913/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5914template<uint32_t const a_fEflInput, uint64_t const a_fLivenessEflInput,
5915 uint32_t const a_fEflOutput, uint64_t const a_fLivenessEflOutput>
5916DECL_INLINE_THROW(uint32_t)
5917iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
5918{
5919 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5920 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5921 /** @todo fix NOT AssertCompile(a_fEflInput != 0 || a_fEflOutput != 0); */
5922
5923#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5924# ifdef VBOX_STRICT
5925 if ( pReNative->idxCurCall != 0
5926 && (a_fEflInput != 0 || a_fEflOutput != 0) /* for NOT these are both zero for now. */)
5927 {
5928 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5929 RT_CONSTEXPR uint32_t const fBoth = a_fEflInput | a_fEflOutput;
5930# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5931 AssertMsg( !(fBoth & (a_fElfConst)) \
5932 || (!(a_fEflInput & (a_fElfConst)) \
5933 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5934 : !(a_fEflOutput & (a_fElfConst)) \
5935 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5936 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5937 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5938 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5939 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5940 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5941 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5942 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5943 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5944 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5945# undef ASSERT_ONE_EFL
5946 }
5947# endif
5948#endif
5949
5950 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, a_fEflInput);
5951 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, a_fEflInput);
5952
5953 /** @todo This could be prettier...*/
5954 /** @todo Also, the shadowing+liveness handling of EFlags is currently
5955 * problematic, but I'll try tackle that soon (@bugref{10720}). */
5956 PCIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarEFlags)];
5957 Assert(pVar->enmKind == kIemNativeVarKind_Invalid || pVar->enmKind == kIemNativeVarKind_Stack);
5958 Assert(pVar->idxReg == UINT8_MAX);
5959 if (pVar->uArgNo >= IEMNATIVE_CALL_ARG_GREG_COUNT)
5960 {
5961 /** @todo We could use kIemNativeGstRegUse_ReadOnly here when fOutput is
5962 * zero, but since iemNativeVarRegisterSet clears the shadowing,
5963 * that's counter productive... */
5964 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ForUpdate,
5965 a_fLivenessEflInput, a_fLivenessEflOutput);
5966 iemNativeVarRegisterSet(pReNative, idxVarEFlags, idxGstReg, off, true /*fAllocated*/);
5967 }
5968 else
5969 {
5970 /* Register argument variable: Avoid assertions in generic call code and load it the traditional way. */
5971 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5972 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(pReNative, &off,
5973 a_fLivenessEflInput, a_fLivenessEflOutput);
5974 if (idxGstReg != UINT8_MAX)
5975 {
5976 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstReg);
5977 iemNativeRegFreeTmp(pReNative, idxGstReg);
5978 }
5979 else
5980 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxVarReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5981 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5982 }
5983 return off;
5984}
5985
5986
5987
5988/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5989 * start using it with custom native code emission (inlining assembly
5990 * instruction helpers). */
5991#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5992#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5993 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5994 off = iemNativeEmitCommitEFlags<true /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
5995 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5996 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5997
5998#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5999#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
6000 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6001 off = iemNativeEmitCommitEFlags<false /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
6002 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
6003 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
6004
6005/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
6006template<bool const a_fUpdateSkippingAndPostponing, uint32_t const a_fEflOutput,
6007 uint64_t const a_fLivenessEflInputBits, uint64_t const a_fLivenessEflOutputBits>
6008DECL_INLINE_THROW(uint32_t)
6009iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fElfInput)
6010{
6011 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
6012 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
6013
6014#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6015# ifdef VBOX_STRICT
6016 if ( pReNative->idxCurCall != 0
6017 && (a_fLivenessEflInputBits != 0 || a_fLivenessEflOutputBits != 0) /* for NOT these are both zero for now. */)
6018 {
6019 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
6020# define ASSERT_ONE_EFL(a_idxField) \
6021 if RT_CONSTEXPR_IF(((a_fLivenessEflInputBits | a_fLivenessEflOutputBits) & RT_BIT_64(a_idxField)) != 0) \
6022 AssertMsg(!(a_fLivenessEflInputBits & RT_BIT_64(a_idxField)) \
6023 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
6024 : !(a_fLivenessEflOutputBits & RT_BIT_64(a_idxField)) \
6025 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
6026 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)), \
6027 ("%s - %u\n", #a_idxField, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
6028 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
6029 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
6030 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
6031 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
6032 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
6033 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
6034 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
6035# undef ASSERT_ONE_EFL
6036 }
6037# endif
6038#endif
6039
6040#ifdef VBOX_STRICT
6041 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
6042 uint32_t offFixup = off;
6043 off = iemNativeEmitJnzToFixed(pReNative, off, off);
6044 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
6045 iemNativeFixupFixedJump(pReNative, offFixup, off);
6046
6047 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
6048 offFixup = off;
6049 off = iemNativeEmitJzToFixed(pReNative, off, off);
6050 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
6051 iemNativeFixupFixedJump(pReNative, offFixup, off);
6052
6053 /** @todo validate that only bits in the a_fEflOutput mask changed. */
6054#endif
6055
6056#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6057 if RT_CONSTEXPR_IF(a_fUpdateSkippingAndPostponing)
6058 {
6059 Assert(!(pReNative->fSkippingEFlags & fElfInput)); RT_NOREF(fElfInput);
6060 if (pReNative->fSkippingEFlags)
6061 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitCommitEFlags)\n",
6062 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~(a_fEflOutput & X86_EFL_STATUS_BITS) ));
6063 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6064 pReNative->fSkippingEFlags = 0;
6065 else
6066 pReNative->fSkippingEFlags &= ~(a_fEflOutput & X86_EFL_STATUS_BITS);
6067# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6068 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6069 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6070 else
6071 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6072 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6073# endif
6074 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6075 }
6076#endif
6077
6078 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
6079 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
6080 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
6081 return off;
6082}
6083
6084
6085typedef enum IEMNATIVEMITEFLOP
6086{
6087 kIemNativeEmitEflOp_Set,
6088 kIemNativeEmitEflOp_Clear,
6089 kIemNativeEmitEflOp_Flip
6090} IEMNATIVEMITEFLOP;
6091
6092#define IEM_MC_SET_EFL_BIT(a_fBit) \
6093 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Set, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6094
6095#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
6096 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Clear, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6097
6098#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
6099 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Flip, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6100
6101/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
6102template<IEMNATIVEMITEFLOP const a_enmOp, uint32_t const a_fEflBit, uint64_t const a_fLivenessEflBit>
6103DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6104{
6105 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlags(pReNative, &off, kIemNativeGstRegUse_ForUpdate,
6106 a_enmOp == kIemNativeEmitEflOp_Flip ? a_fLivenessEflBit : 0,
6107 a_fLivenessEflBit);
6108
6109 /* Using 'if constexpr' forces code elimination in debug builds with VC. */
6110 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Set)
6111 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6112 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Clear)
6113 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~a_fEflBit);
6114 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Flip)
6115 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6116 else
6117 AssertCompile( a_enmOp == kIemNativeEmitEflOp_Set /* AssertCompile(false) works with VC 2019 but not clang 15. */
6118 || a_enmOp == kIemNativeEmitEflOp_Clear
6119 || a_enmOp == kIemNativeEmitEflOp_Flip);
6120
6121 /** @todo No delayed writeback for EFLAGS right now. */
6122 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6123
6124 /* Free but don't flush the EFLAGS register. */
6125 iemNativeRegFreeTmp(pReNative, idxEflReg);
6126
6127#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6128 /* Clear the bit in the skipped mask if we're clobbering and it's a status bit. */
6129 if RT_CONSTEXPR_IF( (a_enmOp == kIemNativeEmitEflOp_Set || a_enmOp == kIemNativeEmitEflOp_Clear)
6130 && (a_fEflBit & X86_EFL_STATUS_BITS))
6131 {
6132 if (pReNative->fSkippingEFlags)
6133 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitModifyEFlagsBit)\n",
6134 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflBit ));
6135 pReNative->fSkippingEFlags &= ~a_fEflBit;
6136# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6137 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~a_fEflBit, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6138# endif
6139 }
6140#endif
6141
6142 return off;
6143}
6144
6145
6146/*********************************************************************************************************************************
6147* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
6148*********************************************************************************************************************************/
6149
6150#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
6151 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
6152
6153#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
6154 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
6155
6156#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
6157 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
6158
6159
6160/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
6161 * IEM_MC_FETCH_SREG_ZX_U64. */
6162DECL_INLINE_THROW(uint32_t)
6163iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
6164{
6165 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6166 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
6167 Assert(iSReg < X86_SREG_COUNT);
6168
6169 /*
6170 * For now, we will not create a shadow copy of a selector. The rational
6171 * is that since we do not recompile the popping and loading of segment
6172 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
6173 * pushing and moving to registers, there is only a small chance that the
6174 * shadow copy will be accessed again before the register is reloaded. One
6175 * scenario would be nested called in 16-bit code, but I doubt it's worth
6176 * the extra register pressure atm.
6177 *
6178 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
6179 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
6180 * store scencario covered at present (r160730).
6181 */
6182 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6183 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6184 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
6185 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6186 return off;
6187}
6188
6189
6190
6191/*********************************************************************************************************************************
6192* Register references. *
6193*********************************************************************************************************************************/
6194
6195#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6196 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6197
6198#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
6199 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6200
6201/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6202DECL_INLINE_THROW(uint32_t)
6203iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6204{
6205 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6206 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6207 Assert(iGRegEx < 20);
6208
6209 if (iGRegEx < 16)
6210 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6211 else
6212 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6213
6214 /* If we've delayed writing back the register value, flush it now. */
6215 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6216
6217 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6218 if (!fConst)
6219 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6220
6221 return off;
6222}
6223
6224#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6225 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6226
6227#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6228 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6229
6230#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6231 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6232
6233#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6234 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6235
6236#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
6237 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
6238
6239#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
6240 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
6241
6242#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
6243 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
6244
6245#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
6246 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
6247
6248#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
6249 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
6250
6251#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
6252 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
6253
6254/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
6255DECL_INLINE_THROW(uint32_t)
6256iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
6257{
6258 Assert(iGReg < 16);
6259 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
6260 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6261
6262 /* If we've delayed writing back the register value, flush it now. */
6263 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
6264
6265 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6266 if (!fConst)
6267 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
6268
6269 return off;
6270}
6271
6272
6273#undef IEM_MC_REF_EFLAGS /* should not be used. */
6274#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
6275 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6276 off = iemNativeEmitRefEFlags<a_fEflOutput>(pReNative, off, a_pEFlags, a_fEflInput)
6277
6278/** Handles IEM_MC_REF_EFLAGS. */
6279template<uint32_t const a_fEflOutput>
6280DECL_INLINE_THROW(uint32_t)
6281iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput)
6282{
6283 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
6284 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6285
6286#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6287 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fEflInput);
6288 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
6289 if (pReNative->fSkippingEFlags)
6290 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitRefEFlags)\n",
6291 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflOutput ));
6292 pReNative->fSkippingEFlags &= ~a_fEflOutput;
6293# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6294
6295 /* Updating the skipping according to the outputs is a little early, but
6296 we don't have any other hooks for references atm. */
6297 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6298 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6299 else if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) != 0)
6300 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6301 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6302# endif
6303
6304 /* This ASSUMES that EFLAGS references are not taken before use. */
6305 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6306
6307#endif
6308 RT_NOREF(fEflInput);
6309
6310 /* If we've delayed writing back the register value, flush it now. */
6311 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
6312
6313 /* If there is a shadow copy of guest EFLAGS, flush it now. */
6314 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
6315
6316 return off;
6317}
6318
6319
6320/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
6321 * different code from threaded recompiler, maybe it would be helpful. For now
6322 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
6323#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
6324
6325
6326#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
6327 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
6328
6329#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
6330 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
6331
6332#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
6333 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
6334
6335#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
6336 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
6337
6338#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6339/* Just being paranoid here. */
6340# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
6341AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
6342AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
6343AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
6344AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
6345# endif
6346AssertCompileMemberOffset(X86XMMREG, au64, 0);
6347AssertCompileMemberOffset(X86XMMREG, au32, 0);
6348AssertCompileMemberOffset(X86XMMREG, ar64, 0);
6349AssertCompileMemberOffset(X86XMMREG, ar32, 0);
6350
6351# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
6352 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
6353# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
6354 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
6355# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
6356 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
6357# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
6358 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
6359#endif
6360
6361/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
6362DECL_INLINE_THROW(uint32_t)
6363iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
6364{
6365 Assert(iXReg < 16);
6366 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
6367 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6368
6369 /* If we've delayed writing back the register value, flush it now. */
6370 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
6371
6372#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6373 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6374 if (!fConst)
6375 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
6376#else
6377 RT_NOREF(fConst);
6378#endif
6379
6380 return off;
6381}
6382
6383
6384
6385/*********************************************************************************************************************************
6386* Effective Address Calculation *
6387*********************************************************************************************************************************/
6388#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6389 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6390
6391/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6392 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6393DECL_INLINE_THROW(uint32_t)
6394iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6395 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6396{
6397 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6398
6399 /*
6400 * Handle the disp16 form with no registers first.
6401 *
6402 * Convert to an immediate value, as that'll delay the register allocation
6403 * and assignment till the memory access / call / whatever and we can use
6404 * a more appropriate register (or none at all).
6405 */
6406 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6407 {
6408 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
6409 return off;
6410 }
6411
6412 /* Determin the displacment. */
6413 uint16_t u16EffAddr;
6414 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6415 {
6416 case 0: u16EffAddr = 0; break;
6417 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6418 case 2: u16EffAddr = u16Disp; break;
6419 default: AssertFailedStmt(u16EffAddr = 0);
6420 }
6421
6422 /* Determine the registers involved. */
6423 uint8_t idxGstRegBase;
6424 uint8_t idxGstRegIndex;
6425 switch (bRm & X86_MODRM_RM_MASK)
6426 {
6427 case 0:
6428 idxGstRegBase = X86_GREG_xBX;
6429 idxGstRegIndex = X86_GREG_xSI;
6430 break;
6431 case 1:
6432 idxGstRegBase = X86_GREG_xBX;
6433 idxGstRegIndex = X86_GREG_xDI;
6434 break;
6435 case 2:
6436 idxGstRegBase = X86_GREG_xBP;
6437 idxGstRegIndex = X86_GREG_xSI;
6438 break;
6439 case 3:
6440 idxGstRegBase = X86_GREG_xBP;
6441 idxGstRegIndex = X86_GREG_xDI;
6442 break;
6443 case 4:
6444 idxGstRegBase = X86_GREG_xSI;
6445 idxGstRegIndex = UINT8_MAX;
6446 break;
6447 case 5:
6448 idxGstRegBase = X86_GREG_xDI;
6449 idxGstRegIndex = UINT8_MAX;
6450 break;
6451 case 6:
6452 idxGstRegBase = X86_GREG_xBP;
6453 idxGstRegIndex = UINT8_MAX;
6454 break;
6455#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6456 default:
6457#endif
6458 case 7:
6459 idxGstRegBase = X86_GREG_xBX;
6460 idxGstRegIndex = UINT8_MAX;
6461 break;
6462 }
6463
6464 /*
6465 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6466 */
6467 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6468 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6469 kIemNativeGstRegUse_ReadOnly);
6470 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6471 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6472 kIemNativeGstRegUse_ReadOnly)
6473 : UINT8_MAX;
6474#ifdef RT_ARCH_AMD64
6475 if (idxRegIndex == UINT8_MAX)
6476 {
6477 if (u16EffAddr == 0)
6478 {
6479 /* movxz ret, base */
6480 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6481 }
6482 else
6483 {
6484 /* lea ret32, [base64 + disp32] */
6485 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6486 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6487 if (idxRegRet >= 8 || idxRegBase >= 8)
6488 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6489 pbCodeBuf[off++] = 0x8d;
6490 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6491 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6492 else
6493 {
6494 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6495 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6496 }
6497 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6498 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6499 pbCodeBuf[off++] = 0;
6500 pbCodeBuf[off++] = 0;
6501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6502
6503 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6504 }
6505 }
6506 else
6507 {
6508 /* lea ret32, [index64 + base64 (+ disp32)] */
6509 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6510 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6511 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6512 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6513 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6514 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6515 pbCodeBuf[off++] = 0x8d;
6516 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6517 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6518 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6519 if (bMod == X86_MOD_MEM4)
6520 {
6521 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6522 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6523 pbCodeBuf[off++] = 0;
6524 pbCodeBuf[off++] = 0;
6525 }
6526 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6527 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6528 }
6529
6530#elif defined(RT_ARCH_ARM64)
6531 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6532 if (u16EffAddr == 0)
6533 {
6534 if (idxRegIndex == UINT8_MAX)
6535 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
6536 else
6537 {
6538 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
6539 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6540 }
6541 }
6542 else
6543 {
6544 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
6545 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
6546 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
6547 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6548 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
6549 else
6550 {
6551 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
6552 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6553 }
6554 if (idxRegIndex != UINT8_MAX)
6555 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
6556 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6557 }
6558
6559#else
6560# error "port me"
6561#endif
6562
6563 if (idxRegIndex != UINT8_MAX)
6564 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6565 iemNativeRegFreeTmp(pReNative, idxRegBase);
6566 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6567 return off;
6568}
6569
6570
6571#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6572 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6573
6574/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
6575 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
6576DECL_INLINE_THROW(uint32_t)
6577iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6578 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
6579{
6580 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6581
6582 /*
6583 * Handle the disp32 form with no registers first.
6584 *
6585 * Convert to an immediate value, as that'll delay the register allocation
6586 * and assignment till the memory access / call / whatever and we can use
6587 * a more appropriate register (or none at all).
6588 */
6589 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6590 {
6591 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
6592 return off;
6593 }
6594
6595 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6596 uint32_t u32EffAddr = 0;
6597 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6598 {
6599 case 0: break;
6600 case 1: u32EffAddr = (int8_t)u32Disp; break;
6601 case 2: u32EffAddr = u32Disp; break;
6602 default: AssertFailed();
6603 }
6604
6605 /* Get the register (or SIB) value. */
6606 uint8_t idxGstRegBase = UINT8_MAX;
6607 uint8_t idxGstRegIndex = UINT8_MAX;
6608 uint8_t cShiftIndex = 0;
6609 switch (bRm & X86_MODRM_RM_MASK)
6610 {
6611 case 0: idxGstRegBase = X86_GREG_xAX; break;
6612 case 1: idxGstRegBase = X86_GREG_xCX; break;
6613 case 2: idxGstRegBase = X86_GREG_xDX; break;
6614 case 3: idxGstRegBase = X86_GREG_xBX; break;
6615 case 4: /* SIB */
6616 {
6617 /* index /w scaling . */
6618 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6619 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6620 {
6621 case 0: idxGstRegIndex = X86_GREG_xAX; break;
6622 case 1: idxGstRegIndex = X86_GREG_xCX; break;
6623 case 2: idxGstRegIndex = X86_GREG_xDX; break;
6624 case 3: idxGstRegIndex = X86_GREG_xBX; break;
6625 case 4: cShiftIndex = 0; /*no index*/ break;
6626 case 5: idxGstRegIndex = X86_GREG_xBP; break;
6627 case 6: idxGstRegIndex = X86_GREG_xSI; break;
6628 case 7: idxGstRegIndex = X86_GREG_xDI; break;
6629 }
6630
6631 /* base */
6632 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
6633 {
6634 case 0: idxGstRegBase = X86_GREG_xAX; break;
6635 case 1: idxGstRegBase = X86_GREG_xCX; break;
6636 case 2: idxGstRegBase = X86_GREG_xDX; break;
6637 case 3: idxGstRegBase = X86_GREG_xBX; break;
6638 case 4:
6639 idxGstRegBase = X86_GREG_xSP;
6640 u32EffAddr += uSibAndRspOffset >> 8;
6641 break;
6642 case 5:
6643 if ((bRm & X86_MODRM_MOD_MASK) != 0)
6644 idxGstRegBase = X86_GREG_xBP;
6645 else
6646 {
6647 Assert(u32EffAddr == 0);
6648 u32EffAddr = u32Disp;
6649 }
6650 break;
6651 case 6: idxGstRegBase = X86_GREG_xSI; break;
6652 case 7: idxGstRegBase = X86_GREG_xDI; break;
6653 }
6654 break;
6655 }
6656 case 5: idxGstRegBase = X86_GREG_xBP; break;
6657 case 6: idxGstRegBase = X86_GREG_xSI; break;
6658 case 7: idxGstRegBase = X86_GREG_xDI; break;
6659 }
6660
6661 /*
6662 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6663 * the start of the function.
6664 */
6665 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6666 {
6667 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
6668 return off;
6669 }
6670
6671 /*
6672 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6673 */
6674 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6675 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6676 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6677 kIemNativeGstRegUse_ReadOnly);
6678 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6679 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6680 kIemNativeGstRegUse_ReadOnly);
6681
6682 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6683 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6684 {
6685 idxRegBase = idxRegIndex;
6686 idxRegIndex = UINT8_MAX;
6687 }
6688
6689#ifdef RT_ARCH_AMD64
6690 if (idxRegIndex == UINT8_MAX)
6691 {
6692 if (u32EffAddr == 0)
6693 {
6694 /* mov ret, base */
6695 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6696 }
6697 else
6698 {
6699 /* lea ret32, [base64 + disp32] */
6700 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6701 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6702 if (idxRegRet >= 8 || idxRegBase >= 8)
6703 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6704 pbCodeBuf[off++] = 0x8d;
6705 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6706 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6707 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6708 else
6709 {
6710 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6711 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6712 }
6713 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6714 if (bMod == X86_MOD_MEM4)
6715 {
6716 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6717 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6718 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6719 }
6720 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6721 }
6722 }
6723 else
6724 {
6725 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6726 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6727 if (idxRegBase == UINT8_MAX)
6728 {
6729 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
6730 if (idxRegRet >= 8 || idxRegIndex >= 8)
6731 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6732 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6733 pbCodeBuf[off++] = 0x8d;
6734 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6735 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6736 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6737 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6738 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6739 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6740 }
6741 else
6742 {
6743 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6744 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6745 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6746 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6747 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6748 pbCodeBuf[off++] = 0x8d;
6749 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6750 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6751 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6752 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6753 if (bMod != X86_MOD_MEM0)
6754 {
6755 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6756 if (bMod == X86_MOD_MEM4)
6757 {
6758 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6759 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6760 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6761 }
6762 }
6763 }
6764 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6765 }
6766
6767#elif defined(RT_ARCH_ARM64)
6768 if (u32EffAddr == 0)
6769 {
6770 if (idxRegIndex == UINT8_MAX)
6771 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6772 else if (idxRegBase == UINT8_MAX)
6773 {
6774 if (cShiftIndex == 0)
6775 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
6776 else
6777 {
6778 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6779 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6780 }
6781 }
6782 else
6783 {
6784 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6785 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6786 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6787 }
6788 }
6789 else
6790 {
6791 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6792 {
6793 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6794 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6795 }
6796 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6797 {
6798 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6799 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6800 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6801 }
6802 else
6803 {
6804 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6805 if (idxRegBase != UINT8_MAX)
6806 {
6807 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6808 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6809 }
6810 }
6811 if (idxRegIndex != UINT8_MAX)
6812 {
6813 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6814 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6815 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6816 }
6817 }
6818
6819#else
6820# error "port me"
6821#endif
6822
6823 if (idxRegIndex != UINT8_MAX)
6824 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6825 if (idxRegBase != UINT8_MAX)
6826 iemNativeRegFreeTmp(pReNative, idxRegBase);
6827 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6828 return off;
6829}
6830
6831
6832#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6833 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6834 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6835
6836#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6837 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6838 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6839
6840#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6841 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6842 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6843
6844/**
6845 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6846 *
6847 * @returns New off.
6848 * @param pReNative .
6849 * @param off .
6850 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6851 * bit 4 to REX.X. The two bits are part of the
6852 * REG sub-field, which isn't needed in this
6853 * function.
6854 * @param uSibAndRspOffset Two parts:
6855 * - The first 8 bits make up the SIB byte.
6856 * - The next 8 bits are the fixed RSP/ESP offset
6857 * in case of a pop [xSP].
6858 * @param u32Disp The displacement byte/word/dword, if any.
6859 * @param cbInstr The size of the fully decoded instruction. Used
6860 * for RIP relative addressing.
6861 * @param idxVarRet The result variable number.
6862 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6863 * when calculating the address.
6864 *
6865 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6866 */
6867DECL_INLINE_THROW(uint32_t)
6868iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6869 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6870{
6871 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6872
6873 /*
6874 * Special case the rip + disp32 form first.
6875 */
6876 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6877 {
6878 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6879 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6880 kIemNativeGstRegUse_ReadOnly);
6881 if (f64Bit)
6882 {
6883#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6884 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr + (int64_t)pReNative->Core.offPc;
6885#else
6886 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6887#endif
6888#ifdef RT_ARCH_AMD64
6889 if ((int32_t)offFinalDisp == offFinalDisp)
6890 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6891 else
6892 {
6893 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6894 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6895 }
6896#else
6897 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6898#endif
6899 }
6900 else
6901 {
6902# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6903 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr + (int32_t)pReNative->Core.offPc;
6904# else
6905 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr;
6906# endif
6907 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6908 }
6909 iemNativeRegFreeTmp(pReNative, idxRegPc);
6910 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6911 return off;
6912 }
6913
6914 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6915 int64_t i64EffAddr = 0;
6916 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6917 {
6918 case 0: break;
6919 case 1: i64EffAddr = (int8_t)u32Disp; break;
6920 case 2: i64EffAddr = (int32_t)u32Disp; break;
6921 default: AssertFailed();
6922 }
6923
6924 /* Get the register (or SIB) value. */
6925 uint8_t idxGstRegBase = UINT8_MAX;
6926 uint8_t idxGstRegIndex = UINT8_MAX;
6927 uint8_t cShiftIndex = 0;
6928 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6929 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6930 else /* SIB: */
6931 {
6932 /* index /w scaling . */
6933 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6934 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6935 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6936 if (idxGstRegIndex == 4)
6937 {
6938 /* no index */
6939 cShiftIndex = 0;
6940 idxGstRegIndex = UINT8_MAX;
6941 }
6942
6943 /* base */
6944 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6945 if (idxGstRegBase == 4)
6946 {
6947 /* pop [rsp] hack */
6948 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6949 }
6950 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6951 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6952 {
6953 /* mod=0 and base=5 -> disp32, no base reg. */
6954 Assert(i64EffAddr == 0);
6955 i64EffAddr = (int32_t)u32Disp;
6956 idxGstRegBase = UINT8_MAX;
6957 }
6958 }
6959
6960 /*
6961 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6962 * the start of the function.
6963 */
6964 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6965 {
6966 if (f64Bit)
6967 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6968 else
6969 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6970 return off;
6971 }
6972
6973 /*
6974 * Now emit code that calculates:
6975 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6976 * or if !f64Bit:
6977 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6978 */
6979 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6980 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6981 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6982 kIemNativeGstRegUse_ReadOnly);
6983 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6984 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6985 kIemNativeGstRegUse_ReadOnly);
6986
6987 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6988 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6989 {
6990 idxRegBase = idxRegIndex;
6991 idxRegIndex = UINT8_MAX;
6992 }
6993
6994#ifdef RT_ARCH_AMD64
6995 uint8_t bFinalAdj;
6996 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6997 bFinalAdj = 0; /* likely */
6998 else
6999 {
7000 /* pop [rsp] with a problematic disp32 value. Split out the
7001 RSP offset and add it separately afterwards (bFinalAdj). */
7002 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
7003 Assert(idxGstRegBase == X86_GREG_xSP);
7004 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
7005 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
7006 Assert(bFinalAdj != 0);
7007 i64EffAddr -= bFinalAdj;
7008 Assert((int32_t)i64EffAddr == i64EffAddr);
7009 }
7010 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
7011//pReNative->pInstrBuf[off++] = 0xcc;
7012
7013 if (idxRegIndex == UINT8_MAX)
7014 {
7015 if (u32EffAddr == 0)
7016 {
7017 /* mov ret, base */
7018 if (f64Bit)
7019 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
7020 else
7021 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
7022 }
7023 else
7024 {
7025 /* lea ret, [base + disp32] */
7026 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
7027 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7028 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
7029 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7030 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7031 | (f64Bit ? X86_OP_REX_W : 0);
7032 pbCodeBuf[off++] = 0x8d;
7033 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7034 if (idxRegBase != X86_GREG_x12 /*SIB*/)
7035 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
7036 else
7037 {
7038 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7039 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
7040 }
7041 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7042 if (bMod == X86_MOD_MEM4)
7043 {
7044 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7045 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7046 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7047 }
7048 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7049 }
7050 }
7051 else
7052 {
7053 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
7054 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7055 if (idxRegBase == UINT8_MAX)
7056 {
7057 /* lea ret, [(index64 << cShiftIndex) + disp32] */
7058 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
7059 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7060 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7061 | (f64Bit ? X86_OP_REX_W : 0);
7062 pbCodeBuf[off++] = 0x8d;
7063 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
7064 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
7065 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7066 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7067 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7068 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7069 }
7070 else
7071 {
7072 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
7073 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
7074 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7075 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7076 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7077 | (f64Bit ? X86_OP_REX_W : 0);
7078 pbCodeBuf[off++] = 0x8d;
7079 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
7080 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7081 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7082 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
7083 if (bMod != X86_MOD_MEM0)
7084 {
7085 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7086 if (bMod == X86_MOD_MEM4)
7087 {
7088 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7089 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7090 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7091 }
7092 }
7093 }
7094 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7095 }
7096
7097 if (!bFinalAdj)
7098 { /* likely */ }
7099 else
7100 {
7101 Assert(f64Bit);
7102 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
7103 }
7104
7105#elif defined(RT_ARCH_ARM64)
7106 if (i64EffAddr == 0)
7107 {
7108 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7109 if (idxRegIndex == UINT8_MAX)
7110 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
7111 else if (idxRegBase != UINT8_MAX)
7112 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
7113 f64Bit, false /*fSetFlags*/, cShiftIndex);
7114 else
7115 {
7116 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
7117 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
7118 }
7119 }
7120 else
7121 {
7122 if (f64Bit)
7123 { /* likely */ }
7124 else
7125 i64EffAddr = (int32_t)i64EffAddr;
7126
7127 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
7128 {
7129 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7130 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
7131 }
7132 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
7133 {
7134 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7135 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
7136 }
7137 else
7138 {
7139 if (f64Bit)
7140 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
7141 else
7142 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
7143 if (idxRegBase != UINT8_MAX)
7144 {
7145 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7146 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
7147 }
7148 }
7149 if (idxRegIndex != UINT8_MAX)
7150 {
7151 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7152 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
7153 f64Bit, false /*fSetFlags*/, cShiftIndex);
7154 }
7155 }
7156
7157#else
7158# error "port me"
7159#endif
7160
7161 if (idxRegIndex != UINT8_MAX)
7162 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7163 if (idxRegBase != UINT8_MAX)
7164 iemNativeRegFreeTmp(pReNative, idxRegBase);
7165 iemNativeVarRegisterRelease(pReNative, idxVarRet);
7166 return off;
7167}
7168
7169
7170/*********************************************************************************************************************************
7171* Memory fetches and stores common *
7172*********************************************************************************************************************************/
7173
7174typedef enum IEMNATIVEMITMEMOP
7175{
7176 kIemNativeEmitMemOp_Store = 0,
7177 kIemNativeEmitMemOp_Fetch,
7178 kIemNativeEmitMemOp_Fetch_Zx_U16,
7179 kIemNativeEmitMemOp_Fetch_Zx_U32,
7180 kIemNativeEmitMemOp_Fetch_Zx_U64,
7181 kIemNativeEmitMemOp_Fetch_Sx_U16,
7182 kIemNativeEmitMemOp_Fetch_Sx_U32,
7183 kIemNativeEmitMemOp_Fetch_Sx_U64
7184} IEMNATIVEMITMEMOP;
7185
7186/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
7187 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
7188 * (with iSegReg = UINT8_MAX). */
7189/** @todo Pass enmOp, cbMem, fAlignMaskAndClt and a iSegReg == UINT8_MAX
7190 * indicator as template parameters. */
7191DECL_INLINE_THROW(uint32_t)
7192iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
7193 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
7194 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
7195{
7196 /*
7197 * Assert sanity.
7198 */
7199 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7200 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7201 Assert( enmOp != kIemNativeEmitMemOp_Store
7202 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7203 || pVarValue->enmKind == kIemNativeVarKind_Stack);
7204 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7205 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7206 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7207 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7208 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7209 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7210#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7211 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
7212 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
7213#else
7214 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
7215#endif
7216 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
7217 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7218#ifdef VBOX_STRICT
7219 if (iSegReg == UINT8_MAX)
7220 {
7221 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7222 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7223 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7224 switch (cbMem)
7225 {
7226 case 1:
7227 Assert( pfnFunction
7228 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
7229 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7230 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7231 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7232 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7233 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
7234 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
7235 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
7236 : UINT64_C(0xc000b000a0009000) ));
7237 Assert(!fAlignMaskAndCtl);
7238 break;
7239 case 2:
7240 Assert( pfnFunction
7241 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
7242 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7243 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7244 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7245 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
7246 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
7247 : UINT64_C(0xc000b000a0009000) ));
7248 Assert(fAlignMaskAndCtl <= 1);
7249 break;
7250 case 4:
7251 Assert( pfnFunction
7252 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
7253 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7254 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7255 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
7256 : UINT64_C(0xc000b000a0009000) ));
7257 Assert(fAlignMaskAndCtl <= 3);
7258 break;
7259 case 8:
7260 Assert( pfnFunction
7261 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
7262 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
7263 : UINT64_C(0xc000b000a0009000) ));
7264 Assert(fAlignMaskAndCtl <= 7);
7265 break;
7266#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7267 case sizeof(RTUINT128U):
7268 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7269 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
7270 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7271 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
7272 || ( enmOp == kIemNativeEmitMemOp_Store
7273 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7274 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
7275 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7276 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7277 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
7278 : fAlignMaskAndCtl <= 15);
7279 break;
7280 case sizeof(RTUINT256U):
7281 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7282 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
7283 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
7284 || ( enmOp == kIemNativeEmitMemOp_Store
7285 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
7286 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
7287 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
7288 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
7289 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
7290 : fAlignMaskAndCtl <= 31);
7291 break;
7292#endif
7293 }
7294 }
7295 else
7296 {
7297 Assert(iSegReg < 6);
7298 switch (cbMem)
7299 {
7300 case 1:
7301 Assert( pfnFunction
7302 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
7303 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
7304 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7305 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7306 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7307 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
7308 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
7309 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
7310 : UINT64_C(0xc000b000a0009000) ));
7311 Assert(!fAlignMaskAndCtl);
7312 break;
7313 case 2:
7314 Assert( pfnFunction
7315 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
7316 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
7317 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7318 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7319 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
7320 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
7321 : UINT64_C(0xc000b000a0009000) ));
7322 Assert(fAlignMaskAndCtl <= 1);
7323 break;
7324 case 4:
7325 Assert( pfnFunction
7326 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
7327 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
7328 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
7329 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
7330 : UINT64_C(0xc000b000a0009000) ));
7331 Assert(fAlignMaskAndCtl <= 3);
7332 break;
7333 case 8:
7334 Assert( pfnFunction
7335 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
7336 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
7337 : UINT64_C(0xc000b000a0009000) ));
7338 Assert(fAlignMaskAndCtl <= 7);
7339 break;
7340#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7341 case sizeof(RTUINT128U):
7342 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7343 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
7344 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7345 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
7346 || ( enmOp == kIemNativeEmitMemOp_Store
7347 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7348 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
7349 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7350 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7351 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
7352 : fAlignMaskAndCtl <= 15);
7353 break;
7354 case sizeof(RTUINT256U):
7355 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7356 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
7357 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
7358 || ( enmOp == kIemNativeEmitMemOp_Store
7359 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
7360 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
7361 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
7362 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
7363 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
7364 : fAlignMaskAndCtl <= 31);
7365 break;
7366#endif
7367 }
7368 }
7369#endif
7370
7371#ifdef VBOX_STRICT
7372 /*
7373 * Check that the fExec flags we've got make sense.
7374 */
7375 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7376#endif
7377
7378 /*
7379 * To keep things simple we have to commit any pending writes first as we
7380 * may end up making calls.
7381 */
7382 /** @todo we could postpone this till we make the call and reload the
7383 * registers after returning from the call. Not sure if that's sensible or
7384 * not, though. */
7385#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7386 off = iemNativeRegFlushPendingWrites(pReNative, off);
7387#else
7388 /* The program counter is treated differently for now. */
7389 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
7390#endif
7391
7392#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7393 /*
7394 * Move/spill/flush stuff out of call-volatile registers.
7395 * This is the easy way out. We could contain this to the tlb-miss branch
7396 * by saving and restoring active stuff here.
7397 */
7398 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7399#endif
7400
7401 /*
7402 * Define labels and allocate the result register (trying for the return
7403 * register if we can).
7404 */
7405 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7406#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7407 uint8_t idxRegValueFetch = UINT8_MAX;
7408
7409 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7410 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7411 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
7412 else
7413 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7414 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7415 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7416 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7417#else
7418 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7419 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7420 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7421 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7422#endif
7423 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
7424
7425#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7426 uint8_t idxRegValueStore = UINT8_MAX;
7427
7428 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7429 idxRegValueStore = !TlbState.fSkip
7430 && enmOp == kIemNativeEmitMemOp_Store
7431 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7432 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7433 : UINT8_MAX;
7434 else
7435 idxRegValueStore = !TlbState.fSkip
7436 && enmOp == kIemNativeEmitMemOp_Store
7437 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7438 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7439 : UINT8_MAX;
7440
7441#else
7442 uint8_t const idxRegValueStore = !TlbState.fSkip
7443 && enmOp == kIemNativeEmitMemOp_Store
7444 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7445 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7446 : UINT8_MAX;
7447#endif
7448 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7449 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7450 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7451 : UINT32_MAX;
7452
7453 /*
7454 * Jump to the TLB lookup code.
7455 */
7456 if (!TlbState.fSkip)
7457 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7458
7459 /*
7460 * TlbMiss:
7461 *
7462 * Call helper to do the fetching.
7463 * We flush all guest register shadow copies here.
7464 */
7465 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7466
7467#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7468 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7469#else
7470 RT_NOREF(idxInstr);
7471#endif
7472
7473#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7474 if (pReNative->Core.offPc)
7475 {
7476 /*
7477 * Update the program counter but restore it at the end of the TlbMiss branch.
7478 * This should allow delaying more program counter updates for the TlbLookup and hit paths
7479 * which are hopefully much more frequent, reducing the amount of memory accesses.
7480 */
7481 /* Allocate a temporary PC register. */
7482/** @todo r=bird: This would technically need to be done up front as it's a register allocation. */
7483 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7484 kIemNativeGstRegUse_ForUpdate);
7485
7486 /* Perform the addition and store the result. */
7487 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7488 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7489# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
7490 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
7491# endif
7492
7493 /* Free and flush the PC register. */
7494 iemNativeRegFreeTmp(pReNative, idxPcReg);
7495 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7496 }
7497#endif
7498
7499#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7500 /* Save variables in volatile registers. */
7501 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7502 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
7503 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
7504 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7505#endif
7506
7507 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
7508 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
7509#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7510 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7511 {
7512 /*
7513 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
7514 *
7515 * Note! There was a register variable assigned to the variable for the TlbLookup case above
7516 * which must not be freed or the value loaded into the register will not be synced into the register
7517 * further down the road because the variable doesn't know it had a variable assigned.
7518 *
7519 * Note! For loads it is not required to sync what is in the assigned register with the stack slot
7520 * as it will be overwritten anyway.
7521 */
7522 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7523 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
7524 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
7525 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7526 }
7527 else
7528#endif
7529 if (enmOp == kIemNativeEmitMemOp_Store)
7530 {
7531 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7532 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
7533#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7534 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7535#else
7536 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7537 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7538#endif
7539 }
7540
7541 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
7542 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
7543#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7544 fVolGregMask);
7545#else
7546 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
7547#endif
7548
7549 if (iSegReg != UINT8_MAX)
7550 {
7551 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
7552 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7553 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
7554 }
7555
7556#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
7557 /* Do delayed EFLAGS calculations. */
7558 if (enmOp == kIemNativeEmitMemOp_Store || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7559 {
7560 if (iSegReg == UINT8_MAX)
7561 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7562 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7563 fHstRegsNotToSave);
7564 else
7565 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7566 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
7567 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
7568 fHstRegsNotToSave);
7569 }
7570 else if (iSegReg == UINT8_MAX)
7571 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState,
7572 fHstRegsNotToSave);
7573 else
7574 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7575 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7576 fHstRegsNotToSave);
7577#endif
7578
7579 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7580 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7581
7582 /* Done setting up parameters, make the call. */
7583 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
7584
7585 /*
7586 * Put the result in the right register if this is a fetch.
7587 */
7588 if (enmOp != kIemNativeEmitMemOp_Store)
7589 {
7590#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7591 if ( cbMem == sizeof(RTUINT128U)
7592 || cbMem == sizeof(RTUINT256U))
7593 {
7594 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
7595
7596 /* Sync the value on the stack with the host register assigned to the variable. */
7597 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
7598 }
7599 else
7600#endif
7601 {
7602 Assert(idxRegValueFetch == pVarValue->idxReg);
7603 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7604 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
7605 }
7606 }
7607
7608#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7609 /* Restore variables and guest shadow registers to volatile registers. */
7610 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7611 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7612#endif
7613
7614#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7615 if (pReNative->Core.offPc)
7616 {
7617 /*
7618 * Time to restore the program counter to its original value.
7619 */
7620 /* Allocate a temporary PC register. */
7621 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7622 kIemNativeGstRegUse_ForUpdate);
7623
7624 /* Restore the original value. */
7625 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7626 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7627
7628 /* Free and flush the PC register. */
7629 iemNativeRegFreeTmp(pReNative, idxPcReg);
7630 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7631 }
7632#endif
7633
7634#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7635 if (!TlbState.fSkip)
7636 {
7637 /* end of TlbMiss - Jump to the done label. */
7638 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7639 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7640
7641 /*
7642 * TlbLookup:
7643 */
7644 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
7645 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
7646 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
7647
7648 /*
7649 * Emit code to do the actual storing / fetching.
7650 */
7651 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7652# ifdef IEM_WITH_TLB_STATISTICS
7653 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7654 enmOp == kIemNativeEmitMemOp_Store
7655 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
7656 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
7657# endif
7658 switch (enmOp)
7659 {
7660 case kIemNativeEmitMemOp_Store:
7661 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
7662 {
7663 switch (cbMem)
7664 {
7665 case 1:
7666 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7667 break;
7668 case 2:
7669 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7670 break;
7671 case 4:
7672 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7673 break;
7674 case 8:
7675 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7676 break;
7677#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7678 case sizeof(RTUINT128U):
7679 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7680 break;
7681 case sizeof(RTUINT256U):
7682 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7683 break;
7684#endif
7685 default:
7686 AssertFailed();
7687 }
7688 }
7689 else
7690 {
7691 switch (cbMem)
7692 {
7693 case 1:
7694 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
7695 idxRegMemResult, TlbState.idxReg1);
7696 break;
7697 case 2:
7698 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7699 idxRegMemResult, TlbState.idxReg1);
7700 break;
7701 case 4:
7702 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7703 idxRegMemResult, TlbState.idxReg1);
7704 break;
7705 case 8:
7706 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
7707 idxRegMemResult, TlbState.idxReg1);
7708 break;
7709 default:
7710 AssertFailed();
7711 }
7712 }
7713 break;
7714
7715 case kIemNativeEmitMemOp_Fetch:
7716 case kIemNativeEmitMemOp_Fetch_Zx_U16:
7717 case kIemNativeEmitMemOp_Fetch_Zx_U32:
7718 case kIemNativeEmitMemOp_Fetch_Zx_U64:
7719 switch (cbMem)
7720 {
7721 case 1:
7722 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7723 break;
7724 case 2:
7725 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7726 break;
7727 case 4:
7728 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7729 break;
7730 case 8:
7731 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7732 break;
7733#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7734 case sizeof(RTUINT128U):
7735 /*
7736 * No need to sync back the register with the stack, this is done by the generic variable handling
7737 * code if there is a register assigned to a variable and the stack must be accessed.
7738 */
7739 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7740 break;
7741 case sizeof(RTUINT256U):
7742 /*
7743 * No need to sync back the register with the stack, this is done by the generic variable handling
7744 * code if there is a register assigned to a variable and the stack must be accessed.
7745 */
7746 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7747 break;
7748#endif
7749 default:
7750 AssertFailed();
7751 }
7752 break;
7753
7754 case kIemNativeEmitMemOp_Fetch_Sx_U16:
7755 Assert(cbMem == 1);
7756 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7757 break;
7758
7759 case kIemNativeEmitMemOp_Fetch_Sx_U32:
7760 Assert(cbMem == 1 || cbMem == 2);
7761 if (cbMem == 1)
7762 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7763 else
7764 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7765 break;
7766
7767 case kIemNativeEmitMemOp_Fetch_Sx_U64:
7768 switch (cbMem)
7769 {
7770 case 1:
7771 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7772 break;
7773 case 2:
7774 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7775 break;
7776 case 4:
7777 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7778 break;
7779 default:
7780 AssertFailed();
7781 }
7782 break;
7783
7784 default:
7785 AssertFailed();
7786 }
7787
7788 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7789
7790 /*
7791 * TlbDone:
7792 */
7793 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7794
7795 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7796
7797# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7798 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7799 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7800# endif
7801 }
7802#else
7803 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
7804#endif
7805
7806 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7807 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7808 return off;
7809}
7810
7811
7812
7813/*********************************************************************************************************************************
7814* Memory fetches (IEM_MEM_FETCH_XXX). *
7815*********************************************************************************************************************************/
7816
7817/* 8-bit segmented: */
7818#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7819 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
7820 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7821 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7822
7823#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7824 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7825 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7826 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7827
7828#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7829 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7830 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7831 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7832
7833#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7834 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7835 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7836 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7837
7838#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7839 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7840 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7841 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7842
7843#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7844 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7845 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7846 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7847
7848#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7849 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7850 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7851 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7852
7853/* 16-bit segmented: */
7854#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7855 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7856 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7857 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7858
7859#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7860 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7861 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7862 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7863
7864#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7865 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7866 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7867 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7868
7869#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7870 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7871 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7872 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7873
7874#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7875 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7876 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7877 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7878
7879#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7880 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7881 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7882 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7883
7884
7885/* 32-bit segmented: */
7886#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7887 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7888 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7889 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7890
7891#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7892 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7893 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7894 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7895
7896#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7897 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7898 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7899 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7900
7901#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7902 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7903 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7904 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7905
7906#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7907 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7908 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7909 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7910
7911#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7912 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7913 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7914 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7915
7916#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7917 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7918 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7919 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7920
7921#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7922 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7923 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7924 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7925
7926#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7927 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
7928 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7929 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7930
7931AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7932#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7933 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
7934 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7935 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7936
7937
7938/* 64-bit segmented: */
7939#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7940 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7941 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7942 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7943
7944AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7945#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7946 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
7947 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7948 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7949
7950
7951/* 8-bit flat: */
7952#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7953 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
7954 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7955 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7956
7957#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7958 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7959 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7960 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7961
7962#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7963 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7964 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7965 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7966
7967#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7968 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7969 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7970 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7971
7972#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7973 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7974 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7975 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7976
7977#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7978 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7979 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7980 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7981
7982#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7983 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7984 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7985 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7986
7987
7988/* 16-bit flat: */
7989#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7990 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7991 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7992 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7993
7994#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7995 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7996 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7997 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7998
7999#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
8000 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
8001 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
8002 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
8003
8004#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
8005 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
8006 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
8007 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
8008
8009#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
8010 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
8011 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
8012 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
8013
8014#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
8015 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
8016 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
8017 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
8018
8019/* 32-bit flat: */
8020#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
8021 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
8022 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
8023 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
8024
8025#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
8026 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
8027 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
8028 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
8029
8030#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
8031 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
8032 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
8033 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
8034
8035#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
8036 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
8037 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
8038 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
8039
8040#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
8041 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
8042 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
8043 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
8044
8045#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
8046 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
8047 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
8048 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
8049
8050#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
8051 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
8052 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
8053 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
8054
8055#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
8056 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
8057 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
8058 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
8059
8060#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
8061 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
8062 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
8063 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
8064
8065#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
8066 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
8067 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
8068 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
8069
8070
8071/* 64-bit flat: */
8072#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
8073 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
8074 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
8075 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
8076
8077#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
8078 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
8079 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
8080 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
8081
8082#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8083/* 128-bit segmented: */
8084#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
8085 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
8086 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
8087 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
8088
8089#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
8090 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8091 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8092 kIemNativeEmitMemOp_Fetch, \
8093 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
8094
8095AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
8096#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
8097 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
8098 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8099 kIemNativeEmitMemOp_Fetch, \
8100 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
8101
8102#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
8103 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
8104 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
8105 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
8106
8107#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
8108 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
8109 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8110 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
8111
8112
8113/* 128-bit flat: */
8114#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
8115 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
8116 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
8117 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
8118
8119#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
8120 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8121 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8122 kIemNativeEmitMemOp_Fetch, \
8123 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
8124
8125#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
8126 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
8127 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8128 kIemNativeEmitMemOp_Fetch, \
8129 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
8130
8131#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
8132 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
8133 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
8134 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
8135
8136#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
8137 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
8138 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8139 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
8140
8141/* 256-bit segmented: */
8142#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
8143 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
8144 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8145 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8146
8147#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8148 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
8149 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8150 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8151
8152#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
8153 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
8154 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
8155 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8156
8157#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8158 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
8159 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8160 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8161
8162
8163/* 256-bit flat: */
8164#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
8165 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
8166 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8167 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8168
8169#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
8170 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
8171 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8172 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8173
8174#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
8175 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
8176 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
8177 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8178
8179#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
8180 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, \
8181 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8182 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8183
8184#endif
8185
8186
8187/*********************************************************************************************************************************
8188* Memory stores (IEM_MEM_STORE_XXX). *
8189*********************************************************************************************************************************/
8190
8191#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
8192 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
8193 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
8194 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8195
8196#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
8197 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
8198 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
8199 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8200
8201#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
8202 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
8203 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
8204 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8205
8206#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
8207 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
8208 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
8209 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8210
8211
8212#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
8213 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
8214 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
8215 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8216
8217#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
8218 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
8219 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
8220 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8221
8222#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
8223 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
8224 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
8225 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8226
8227#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
8228 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
8229 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
8230 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8231
8232
8233#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
8234 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8235 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8236
8237#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
8238 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8239 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8240
8241#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
8242 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8243 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8244
8245#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
8246 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8247 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8248
8249
8250#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
8251 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8252 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8253
8254#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
8255 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8256 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8257
8258#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
8259 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8260 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8261
8262#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
8263 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8264 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8265
8266/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
8267 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
8268DECL_INLINE_THROW(uint32_t)
8269iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
8270 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
8271{
8272 /*
8273 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
8274 * to do the grunt work.
8275 */
8276 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
8277 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
8278 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
8279 pfnFunction, idxInstr);
8280 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
8281 return off;
8282}
8283
8284
8285#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8286# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
8287 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8288 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8289 kIemNativeEmitMemOp_Store, \
8290 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
8291
8292# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
8293 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
8294 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
8295 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
8296
8297# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
8298 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
8299 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
8300 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
8301
8302# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
8303 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
8304 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8305 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8306
8307
8308# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
8309 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8310 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8311 kIemNativeEmitMemOp_Store, \
8312 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
8313
8314# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
8315 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
8316 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
8317 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
8318
8319# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
8320 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
8321 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
8322 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
8323
8324# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
8325 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
8326 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8327 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8328#endif
8329
8330
8331
8332/*********************************************************************************************************************************
8333* Stack Accesses. *
8334*********************************************************************************************************************************/
8335/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
8336#define IEM_MC_PUSH_U16(a_u16Value) \
8337 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8338 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
8339#define IEM_MC_PUSH_U32(a_u32Value) \
8340 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8341 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
8342#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
8343 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
8344 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
8345#define IEM_MC_PUSH_U64(a_u64Value) \
8346 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8347 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
8348
8349#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
8350 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8351 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8352#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
8353 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8354 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
8355#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
8356 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
8357 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
8358
8359#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
8360 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8361 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8362#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
8363 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8364 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
8365
8366
8367/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
8368DECL_INLINE_THROW(uint32_t)
8369iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
8370 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8371{
8372 /*
8373 * Assert sanity.
8374 */
8375 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8376 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
8377#ifdef VBOX_STRICT
8378 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8379 {
8380 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8381 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8382 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8383 Assert( pfnFunction
8384 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8385 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
8386 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
8387 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8388 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
8389 : UINT64_C(0xc000b000a0009000) ));
8390 }
8391 else
8392 Assert( pfnFunction
8393 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
8394 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
8395 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
8396 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
8397 : UINT64_C(0xc000b000a0009000) ));
8398#endif
8399
8400#ifdef VBOX_STRICT
8401 /*
8402 * Check that the fExec flags we've got make sense.
8403 */
8404 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8405#endif
8406
8407 /*
8408 * To keep things simple we have to commit any pending writes first as we
8409 * may end up making calls.
8410 */
8411 /** @todo we could postpone this till we make the call and reload the
8412 * registers after returning from the call. Not sure if that's sensible or
8413 * not, though. */
8414 off = iemNativeRegFlushPendingWrites(pReNative, off);
8415
8416 /*
8417 * First we calculate the new RSP and the effective stack pointer value.
8418 * For 64-bit mode and flat 32-bit these two are the same.
8419 * (Code structure is very similar to that of PUSH)
8420 */
8421 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8422 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
8423 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
8424 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
8425 ? cbMem : sizeof(uint16_t);
8426 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8427 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8428 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8429 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8430 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8431 if (cBitsFlat != 0)
8432 {
8433 Assert(idxRegEffSp == idxRegRsp);
8434 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8435 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8436 if (cBitsFlat == 64)
8437 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
8438 else
8439 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
8440 }
8441 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8442 {
8443 Assert(idxRegEffSp != idxRegRsp);
8444 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8445 kIemNativeGstRegUse_ReadOnly);
8446#ifdef RT_ARCH_AMD64
8447 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8448#else
8449 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8450#endif
8451 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8452 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8453 offFixupJumpToUseOtherBitSp = off;
8454 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8455 {
8456 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8457 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8458 }
8459 else
8460 {
8461 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8462 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8463 }
8464 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8465 }
8466 /* SpUpdateEnd: */
8467 uint32_t const offLabelSpUpdateEnd = off;
8468
8469 /*
8470 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8471 * we're skipping lookup).
8472 */
8473 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8474 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
8475 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8476 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8477 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8478 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8479 : UINT32_MAX;
8480 uint8_t const idxRegValue = !TlbState.fSkip
8481 && pVarValue->enmKind != kIemNativeVarKind_Immediate
8482 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
8483 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
8484 : UINT8_MAX;
8485 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
8486
8487
8488 if (!TlbState.fSkip)
8489 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8490 else
8491 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8492
8493 /*
8494 * Use16BitSp:
8495 */
8496 if (cBitsFlat == 0)
8497 {
8498#ifdef RT_ARCH_AMD64
8499 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8500#else
8501 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8502#endif
8503 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8504 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8505 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8506 else
8507 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8508 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8509 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8510 }
8511
8512 /*
8513 * TlbMiss:
8514 *
8515 * Call helper to do the pushing.
8516 */
8517 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8518
8519#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8520 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8521#else
8522 RT_NOREF(idxInstr);
8523#endif
8524
8525 /* Save variables in volatile registers. */
8526 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8527 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8528 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
8529 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
8530 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8531
8532 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
8533 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
8534 {
8535 /* Swap them using ARG0 as temp register: */
8536 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
8537 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
8538 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
8539 }
8540 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
8541 {
8542 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
8543 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
8544 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8545
8546 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
8547 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8548 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8549 }
8550 else
8551 {
8552 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
8553 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8554
8555 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
8556 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
8557 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
8558 }
8559
8560#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8561 /* Do delayed EFLAGS calculations. */
8562 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
8563 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8564#endif
8565
8566 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8567 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8568
8569 /* Done setting up parameters, make the call. */
8570 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8571
8572 /* Restore variables and guest shadow registers to volatile registers. */
8573 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8574 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8575
8576#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8577 if (!TlbState.fSkip)
8578 {
8579 /* end of TlbMiss - Jump to the done label. */
8580 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8581 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8582
8583 /*
8584 * TlbLookup:
8585 */
8586 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
8587 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8588
8589 /*
8590 * Emit code to do the actual storing / fetching.
8591 */
8592 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
8593# ifdef IEM_WITH_TLB_STATISTICS
8594 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8595 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8596# endif
8597 if (idxRegValue != UINT8_MAX)
8598 {
8599 switch (cbMemAccess)
8600 {
8601 case 2:
8602 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8603 break;
8604 case 4:
8605 if (!fIsIntelSeg)
8606 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8607 else
8608 {
8609 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
8610 PUSH FS in real mode, so we have to try emulate that here.
8611 We borrow the now unused idxReg1 from the TLB lookup code here. */
8612 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
8613 kIemNativeGstReg_EFlags);
8614 if (idxRegEfl != UINT8_MAX)
8615 {
8616#ifdef ARCH_AMD64
8617 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
8618 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8619 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8620#else
8621 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
8622 off, TlbState.idxReg1, idxRegEfl,
8623 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8624#endif
8625 iemNativeRegFreeTmp(pReNative, idxRegEfl);
8626 }
8627 else
8628 {
8629 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
8630 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8631 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8632 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8633 }
8634 /* ASSUMES the upper half of idxRegValue is ZERO. */
8635 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
8636 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
8637 }
8638 break;
8639 case 8:
8640 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8641 break;
8642 default:
8643 AssertFailed();
8644 }
8645 }
8646 else
8647 {
8648 switch (cbMemAccess)
8649 {
8650 case 2:
8651 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
8652 idxRegMemResult, TlbState.idxReg1);
8653 break;
8654 case 4:
8655 Assert(!fIsSegReg);
8656 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
8657 idxRegMemResult, TlbState.idxReg1);
8658 break;
8659 case 8:
8660 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
8661 break;
8662 default:
8663 AssertFailed();
8664 }
8665 }
8666
8667 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8668 TlbState.freeRegsAndReleaseVars(pReNative);
8669
8670 /*
8671 * TlbDone:
8672 *
8673 * Commit the new RSP value.
8674 */
8675 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8676 }
8677#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8678
8679#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8680 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
8681#endif
8682 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8683 if (idxRegEffSp != idxRegRsp)
8684 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8685
8686 /* The value variable is implictly flushed. */
8687 if (idxRegValue != UINT8_MAX)
8688 iemNativeVarRegisterRelease(pReNative, idxVarValue);
8689 iemNativeVarFreeLocal(pReNative, idxVarValue);
8690
8691 return off;
8692}
8693
8694
8695
8696/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
8697#define IEM_MC_POP_GREG_U16(a_iGReg) \
8698 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8699 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
8700#define IEM_MC_POP_GREG_U32(a_iGReg) \
8701 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8702 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
8703#define IEM_MC_POP_GREG_U64(a_iGReg) \
8704 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8705 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
8706
8707#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
8708 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8709 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8710#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
8711 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8712 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
8713
8714#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
8715 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8716 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8717#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
8718 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8719 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
8720
8721
8722DECL_FORCE_INLINE_THROW(uint32_t)
8723iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
8724 uint8_t idxRegTmp)
8725{
8726 /* Use16BitSp: */
8727#ifdef RT_ARCH_AMD64
8728 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8729 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
8730 RT_NOREF(idxRegTmp);
8731#else
8732 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
8733 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
8734 /* add tmp, regrsp, #cbMem */
8735 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
8736 /* and tmp, tmp, #0xffff */
8737 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
8738 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
8739 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
8740 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
8741#endif
8742 return off;
8743}
8744
8745
8746DECL_FORCE_INLINE(uint32_t)
8747iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
8748{
8749 /* Use32BitSp: */
8750 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8751 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
8752 return off;
8753}
8754
8755
8756/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
8757DECL_INLINE_THROW(uint32_t)
8758iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
8759 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8760{
8761 /*
8762 * Assert sanity.
8763 */
8764 Assert(idxGReg < 16);
8765#ifdef VBOX_STRICT
8766 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8767 {
8768 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8769 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8770 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8771 Assert( pfnFunction
8772 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8773 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
8774 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8775 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
8776 : UINT64_C(0xc000b000a0009000) ));
8777 }
8778 else
8779 Assert( pfnFunction
8780 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
8781 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
8782 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
8783 : UINT64_C(0xc000b000a0009000) ));
8784#endif
8785
8786#ifdef VBOX_STRICT
8787 /*
8788 * Check that the fExec flags we've got make sense.
8789 */
8790 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8791#endif
8792
8793 /*
8794 * To keep things simple we have to commit any pending writes first as we
8795 * may end up making calls.
8796 */
8797 off = iemNativeRegFlushPendingWrites(pReNative, off);
8798
8799 /*
8800 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
8801 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
8802 * directly as the effective stack pointer.
8803 * (Code structure is very similar to that of PUSH)
8804 */
8805 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8806 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8807 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8808 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8809 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8810 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8811 * will be the resulting register value. */
8812 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8813
8814 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8815 if (cBitsFlat != 0)
8816 {
8817 Assert(idxRegEffSp == idxRegRsp);
8818 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8819 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8820 }
8821 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8822 {
8823 Assert(idxRegEffSp != idxRegRsp);
8824 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8825 kIemNativeGstRegUse_ReadOnly);
8826#ifdef RT_ARCH_AMD64
8827 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8828#else
8829 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8830#endif
8831 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8832 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8833 offFixupJumpToUseOtherBitSp = off;
8834 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8835 {
8836/** @todo can skip idxRegRsp updating when popping ESP. */
8837 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8838 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8839 }
8840 else
8841 {
8842 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8843 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8844 }
8845 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8846 }
8847 /* SpUpdateEnd: */
8848 uint32_t const offLabelSpUpdateEnd = off;
8849
8850 /*
8851 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8852 * we're skipping lookup).
8853 */
8854 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8855 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8856 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8857 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8858 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8859 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8860 : UINT32_MAX;
8861
8862 if (!TlbState.fSkip)
8863 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8864 else
8865 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8866
8867 /*
8868 * Use16BitSp:
8869 */
8870 if (cBitsFlat == 0)
8871 {
8872#ifdef RT_ARCH_AMD64
8873 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8874#else
8875 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8876#endif
8877 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8878 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8879 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8880 else
8881 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8882 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8883 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8884 }
8885
8886 /*
8887 * TlbMiss:
8888 *
8889 * Call helper to do the pushing.
8890 */
8891 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8892
8893#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8894 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8895#else
8896 RT_NOREF(idxInstr);
8897#endif
8898
8899 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8900 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8901 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8902 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8903
8904
8905 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8906 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8907 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8908
8909#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8910 /* Do delayed EFLAGS calculations. */
8911 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8912#endif
8913
8914 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8915 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8916
8917 /* Done setting up parameters, make the call. */
8918 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8919
8920 /* Move the return register content to idxRegMemResult. */
8921 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8922 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8923
8924 /* Restore variables and guest shadow registers to volatile registers. */
8925 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8926 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8927
8928#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8929 if (!TlbState.fSkip)
8930 {
8931 /* end of TlbMiss - Jump to the done label. */
8932 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8933 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8934
8935 /*
8936 * TlbLookup:
8937 */
8938 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
8939 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8940
8941 /*
8942 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8943 */
8944 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8945# ifdef IEM_WITH_TLB_STATISTICS
8946 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8947 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8948# endif
8949 switch (cbMem)
8950 {
8951 case 2:
8952 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8953 break;
8954 case 4:
8955 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8956 break;
8957 case 8:
8958 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8959 break;
8960 default:
8961 AssertFailed();
8962 }
8963
8964 TlbState.freeRegsAndReleaseVars(pReNative);
8965
8966 /*
8967 * TlbDone:
8968 *
8969 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8970 * commit the popped register value.
8971 */
8972 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8973 }
8974#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8975
8976 if (idxGReg != X86_GREG_xSP)
8977 {
8978 /* Set the register. */
8979 if (cbMem >= sizeof(uint32_t))
8980 {
8981#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8982 AssertMsg( pReNative->idxCurCall == 0
8983 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8984 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8985 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8986#endif
8987 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8988#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8989 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8990#endif
8991#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8992 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8993 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8994#endif
8995 }
8996 else
8997 {
8998 Assert(cbMem == sizeof(uint16_t));
8999 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
9000 kIemNativeGstRegUse_ForUpdate);
9001 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
9002#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
9003 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
9004#endif
9005 iemNativeRegFreeTmp(pReNative, idxRegDst);
9006 }
9007
9008 /* Complete RSP calculation for FLAT mode. */
9009 if (idxRegEffSp == idxRegRsp)
9010 {
9011 if (cBitsFlat == 64)
9012 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
9013 else
9014 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
9015 }
9016 }
9017 else
9018 {
9019 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
9020 if (cbMem == sizeof(uint64_t))
9021 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
9022 else if (cbMem == sizeof(uint32_t))
9023 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
9024 else
9025 {
9026 if (idxRegEffSp == idxRegRsp)
9027 {
9028 if (cBitsFlat == 64)
9029 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
9030 else
9031 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
9032 }
9033 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
9034 }
9035 }
9036
9037#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
9038 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
9039#endif
9040
9041 iemNativeRegFreeTmp(pReNative, idxRegRsp);
9042 if (idxRegEffSp != idxRegRsp)
9043 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
9044 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
9045
9046 return off;
9047}
9048
9049
9050
9051/*********************************************************************************************************************************
9052* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
9053*********************************************************************************************************************************/
9054
9055#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9056 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9057 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
9058 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
9059
9060#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9061 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9062 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
9063 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
9064
9065#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9066 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9067 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
9068 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
9069
9070#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9071 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9072 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
9073 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
9074
9075
9076#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9077 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9078 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9079 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
9080
9081#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9082 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9083 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9084 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
9085
9086#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9087 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9088 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9089 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
9090
9091#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9092 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9093 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9094 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
9095
9096#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9097 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
9098 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9099 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
9100
9101
9102#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9103 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9104 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9105 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
9106
9107#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9108 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9109 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9110 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
9111
9112#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9113 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9114 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9115 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
9116
9117#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9118 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9119 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9120 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
9121
9122#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9123 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
9124 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9125 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
9126
9127
9128#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9129 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9130 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9131 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
9132
9133#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9134 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9135 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9136 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
9137#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9138 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9139 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9140 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
9141
9142#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9143 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9144 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9145 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
9146
9147#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9148 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
9149 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9150 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
9151
9152
9153#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9154 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
9155 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9156 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
9157
9158#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9159 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
9160 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
9161 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
9162
9163
9164#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9165 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9166 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9167 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
9168
9169#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9170 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9171 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9172 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
9173
9174#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9175 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9176 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9177 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
9178
9179#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9180 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9181 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9182 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
9183
9184
9185
9186#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9187 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9188 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
9189 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
9190
9191#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9192 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9193 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
9194 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
9195
9196#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9197 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9198 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
9199 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
9200
9201#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9202 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9203 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
9204 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
9205
9206
9207#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9208 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9209 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9210 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
9211
9212#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9213 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9214 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9215 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9216
9217#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9218 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9219 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9220 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9221
9222#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9223 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9224 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9225 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9226
9227#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9228 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
9229 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9230 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9231
9232
9233#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9234 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9235 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9236 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
9237
9238#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9239 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9240 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9241 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9242
9243#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9244 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9245 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9246 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9247
9248#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9249 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9250 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9251 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9252
9253#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9254 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
9255 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9256 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9257
9258
9259#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9260 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9261 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9262 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
9263
9264#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9265 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9266 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9267 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9268
9269#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9270 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9271 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9272 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9273
9274#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9275 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9276 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9277 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9278
9279#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9280 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
9281 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9282 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9283
9284
9285#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9286 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9287 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9288 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9289
9290#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9291 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9292 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
9293 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9294
9295
9296#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9297 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9298 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9299 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
9300
9301#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9302 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9303 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9304 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9305
9306#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9307 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9308 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9309 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9310
9311#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9312 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9313 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9314 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9315
9316
9317DECL_INLINE_THROW(uint32_t)
9318iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9319 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
9320 uintptr_t pfnFunction, uint8_t idxInstr)
9321{
9322 /*
9323 * Assert sanity.
9324 */
9325 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9326 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
9327 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
9328 && pVarMem->cbVar == sizeof(void *),
9329 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9330
9331 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9332 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9333 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
9334 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
9335 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9336
9337 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
9338 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9339 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
9340 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
9341 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9342
9343 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
9344
9345 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9346
9347#ifdef VBOX_STRICT
9348# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
9349 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9350 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9351 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
9352 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9353# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9354 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
9355 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
9356 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
9357
9358 if (iSegReg == UINT8_MAX)
9359 {
9360 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9361 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9362 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9363 switch (cbMem)
9364 {
9365 case 1:
9366 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
9367 Assert(!fAlignMaskAndCtl);
9368 break;
9369 case 2:
9370 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
9371 Assert(fAlignMaskAndCtl < 2);
9372 break;
9373 case 4:
9374 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
9375 Assert(fAlignMaskAndCtl < 4);
9376 break;
9377 case 8:
9378 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
9379 Assert(fAlignMaskAndCtl < 8);
9380 break;
9381 case 10:
9382 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9383 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9384 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9385 Assert(fAlignMaskAndCtl < 8);
9386 break;
9387 case 16:
9388 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
9389 Assert(fAlignMaskAndCtl < 16);
9390 break;
9391# if 0
9392 case 32:
9393 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
9394 Assert(fAlignMaskAndCtl < 32);
9395 break;
9396 case 64:
9397 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
9398 Assert(fAlignMaskAndCtl < 64);
9399 break;
9400# endif
9401 default: AssertFailed(); break;
9402 }
9403 }
9404 else
9405 {
9406 Assert(iSegReg < 6);
9407 switch (cbMem)
9408 {
9409 case 1:
9410 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
9411 Assert(!fAlignMaskAndCtl);
9412 break;
9413 case 2:
9414 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
9415 Assert(fAlignMaskAndCtl < 2);
9416 break;
9417 case 4:
9418 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
9419 Assert(fAlignMaskAndCtl < 4);
9420 break;
9421 case 8:
9422 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
9423 Assert(fAlignMaskAndCtl < 8);
9424 break;
9425 case 10:
9426 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9427 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9428 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9429 Assert(fAlignMaskAndCtl < 8);
9430 break;
9431 case 16:
9432 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
9433 Assert(fAlignMaskAndCtl < 16);
9434 break;
9435# if 0
9436 case 32:
9437 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
9438 Assert(fAlignMaskAndCtl < 32);
9439 break;
9440 case 64:
9441 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
9442 Assert(fAlignMaskAndCtl < 64);
9443 break;
9444# endif
9445 default: AssertFailed(); break;
9446 }
9447 }
9448# undef IEM_MAP_HLP_FN
9449# undef IEM_MAP_HLP_FN_NO_AT
9450#endif
9451
9452#ifdef VBOX_STRICT
9453 /*
9454 * Check that the fExec flags we've got make sense.
9455 */
9456 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9457#endif
9458
9459 /*
9460 * To keep things simple we have to commit any pending writes first as we
9461 * may end up making calls.
9462 */
9463 off = iemNativeRegFlushPendingWrites(pReNative, off);
9464
9465#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9466 /*
9467 * Move/spill/flush stuff out of call-volatile registers.
9468 * This is the easy way out. We could contain this to the tlb-miss branch
9469 * by saving and restoring active stuff here.
9470 */
9471 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9472 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9473#endif
9474
9475 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
9476 while the tlb-miss codepath will temporarily put it on the stack.
9477 Set the the type to stack here so we don't need to do it twice below. */
9478 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9479 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
9480 /** @todo use a tmp register from TlbState, since they'll be free after tlb
9481 * lookup is done. */
9482
9483 /*
9484 * Define labels and allocate the result register (trying for the return
9485 * register if we can).
9486 */
9487 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9488 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9489 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
9490 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
9491 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
9492 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
9493 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
9494 : UINT32_MAX;
9495
9496 /*
9497 * Jump to the TLB lookup code.
9498 */
9499 if (!TlbState.fSkip)
9500 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
9501
9502 /*
9503 * TlbMiss:
9504 *
9505 * Call helper to do the fetching.
9506 * We flush all guest register shadow copies here.
9507 */
9508 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
9509
9510#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9511 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9512#else
9513 RT_NOREF(idxInstr);
9514#endif
9515
9516#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9517 /* Save variables in volatile registers. */
9518 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
9519 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
9520#endif
9521
9522 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
9523 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
9524#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9525 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
9526#else
9527 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9528#endif
9529
9530 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9531 if (iSegReg != UINT8_MAX)
9532 {
9533 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9534 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9535 }
9536
9537#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9538 /* Do delayed EFLAGS calculations. */
9539 if (iSegReg == UINT8_MAX)
9540 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
9541 fHstRegsNotToSave);
9542 else
9543 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
9544 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
9545 fHstRegsNotToSave);
9546#endif
9547
9548 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
9549 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
9550 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
9551
9552 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9553 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9554
9555 /* Done setting up parameters, make the call. */
9556 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9557
9558 /*
9559 * Put the output in the right registers.
9560 */
9561 Assert(idxRegMemResult == pVarMem->idxReg);
9562 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9563 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9564
9565#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9566 /* Restore variables and guest shadow registers to volatile registers. */
9567 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
9568 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
9569#endif
9570
9571 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
9572 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
9573
9574#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9575 if (!TlbState.fSkip)
9576 {
9577 /* end of tlbsmiss - Jump to the done label. */
9578 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9579 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
9580
9581 /*
9582 * TlbLookup:
9583 */
9584 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
9585 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
9586# ifdef IEM_WITH_TLB_STATISTICS
9587 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
9588 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
9589# endif
9590
9591 /* [idxVarUnmapInfo] = 0; */
9592 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
9593
9594 /*
9595 * TlbDone:
9596 */
9597 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9598
9599 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
9600
9601# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9602 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
9603 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9604# endif
9605 }
9606#else
9607 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
9608#endif
9609
9610 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9611 iemNativeVarRegisterRelease(pReNative, idxVarMem);
9612
9613 return off;
9614}
9615
9616
9617#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
9618 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
9619 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
9620
9621#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9622 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
9623 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
9624
9625#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9626 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
9627 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
9628
9629#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9630 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
9631 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
9632
9633DECL_INLINE_THROW(uint32_t)
9634iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9635 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
9636{
9637 /*
9638 * Assert sanity.
9639 */
9640 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9641#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
9642 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9643#endif
9644 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
9645 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9646 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9647#ifdef VBOX_STRICT
9648 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
9649 {
9650 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
9651 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
9652 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
9653 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9654 case IEM_ACCESS_TYPE_WRITE:
9655 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9656 case IEM_ACCESS_TYPE_READ:
9657 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9658 default: AssertFailed();
9659 }
9660#else
9661 RT_NOREF(fAccess);
9662#endif
9663
9664 /*
9665 * To keep things simple we have to commit any pending writes first as we
9666 * may end up making calls (there shouldn't be any at this point, so this
9667 * is just for consistency).
9668 */
9669 /** @todo we could postpone this till we make the call and reload the
9670 * registers after returning from the call. Not sure if that's sensible or
9671 * not, though. */
9672 off = iemNativeRegFlushPendingWrites(pReNative, off);
9673
9674 /*
9675 * Move/spill/flush stuff out of call-volatile registers.
9676 *
9677 * We exclude any register holding the bUnmapInfo variable, as we'll be
9678 * checking it after returning from the call and will free it afterwards.
9679 */
9680 /** @todo save+restore active registers and maybe guest shadows in miss
9681 * scenario. */
9682 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
9683 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
9684
9685 /*
9686 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9687 * to call the unmap helper function.
9688 *
9689 * The likelyhood of it being zero is higher than for the TLB hit when doing
9690 * the mapping, as a TLB miss for an well aligned and unproblematic memory
9691 * access should also end up with a mapping that won't need special unmapping.
9692 */
9693 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
9694 * should speed up things for the pure interpreter as well when TLBs
9695 * are enabled. */
9696#ifdef RT_ARCH_AMD64
9697 if (pVarUnmapInfo->idxReg == UINT8_MAX)
9698 {
9699 /* test byte [rbp - xxx], 0ffh */
9700 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9701 pbCodeBuf[off++] = 0xf6;
9702 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
9703 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9704 pbCodeBuf[off++] = 0xff;
9705 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9706 }
9707 else
9708#endif
9709 {
9710 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
9711 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
9712 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9713 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9714 }
9715 uint32_t const offJmpFixup = off;
9716 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
9717
9718 /*
9719 * Call the unmap helper function.
9720 */
9721#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9722 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9723#else
9724 RT_NOREF(idxInstr);
9725#endif
9726
9727 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
9728 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
9729 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9730
9731 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9732 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9733
9734 /* Done setting up parameters, make the call.
9735 Note! Since we can only end up here if we took a TLB miss, any postponed EFLAGS
9736 calculations has been done there already. Thus, a_fSkipEflChecks = true. */
9737 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9738
9739 /* The bUnmapInfo variable is implictly free by these MCs. */
9740 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
9741
9742 /*
9743 * Done, just fixup the jump for the non-call case.
9744 */
9745 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9746
9747 return off;
9748}
9749
9750
9751
9752/*********************************************************************************************************************************
9753* State and Exceptions *
9754*********************************************************************************************************************************/
9755
9756#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9757#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9758
9759#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9760#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9761#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9762
9763#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9764#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9765#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9766
9767
9768DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9769{
9770#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
9771 RT_NOREF(pReNative, fForChange);
9772#else
9773 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
9774 && fForChange)
9775 {
9776# ifdef RT_ARCH_AMD64
9777
9778 /* Need to save the host MXCSR the first time, and clear the exception flags. */
9779 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9780 {
9781 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9782
9783 /* stmxcsr */
9784 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9785 pbCodeBuf[off++] = X86_OP_REX_B;
9786 pbCodeBuf[off++] = 0x0f;
9787 pbCodeBuf[off++] = 0xae;
9788 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9789 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9790 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9791 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9792 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9793 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9794
9795 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9796 }
9797
9798 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9799 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9800
9801 /*
9802 * Mask any exceptions and clear the exception status and save into MXCSR,
9803 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
9804 * a register source/target (sigh).
9805 */
9806 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
9807 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
9808 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
9809 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9810
9811 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9812
9813 /* ldmxcsr */
9814 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9815 pbCodeBuf[off++] = X86_OP_REX_B;
9816 pbCodeBuf[off++] = 0x0f;
9817 pbCodeBuf[off++] = 0xae;
9818 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9819 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9820 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9821 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9822 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9823 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9824
9825 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9826 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9827
9828# elif defined(RT_ARCH_ARM64)
9829 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9830
9831 /* Need to save the host floating point control register the first time, clear FPSR. */
9832 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9833 {
9834 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9835 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9836 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9837 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9838 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9839 }
9840
9841 /*
9842 * Translate MXCSR to FPCR.
9843 *
9844 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9845 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9846 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9847 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9848 */
9849 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9850 * and implement alternate handling if FEAT_AFP is present. */
9851 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9852
9853 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9854
9855 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9856 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9857
9858 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9859 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9860 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9861 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9862 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9863
9864 /*
9865 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9866 *
9867 * Value MXCSR FPCR
9868 * 0 RN RN
9869 * 1 R- R+
9870 * 2 R+ R-
9871 * 3 RZ RZ
9872 *
9873 * Conversion can be achieved by switching bit positions
9874 */
9875 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9876 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9877 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9878 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9879
9880 /* Write the value to FPCR. */
9881 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9882
9883 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9884 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9885 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9886# else
9887# error "Port me"
9888# endif
9889 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9890 }
9891#endif
9892 return off;
9893}
9894
9895
9896
9897/*********************************************************************************************************************************
9898* Emitters for FPU related operations. *
9899*********************************************************************************************************************************/
9900
9901#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9902 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9903
9904/** Emits code for IEM_MC_FETCH_FCW. */
9905DECL_INLINE_THROW(uint32_t)
9906iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9907{
9908 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9909 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9910
9911 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9912
9913 /* Allocate a temporary FCW register. */
9914 /** @todo eliminate extra register */
9915 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9916 kIemNativeGstRegUse_ReadOnly);
9917
9918 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9919
9920 /* Free but don't flush the FCW register. */
9921 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9922 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9923
9924 return off;
9925}
9926
9927
9928#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9929 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9930
9931/** Emits code for IEM_MC_FETCH_FSW. */
9932DECL_INLINE_THROW(uint32_t)
9933iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9934{
9935 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9936 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9937
9938 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
9939 /* Allocate a temporary FSW register. */
9940 /** @todo eliminate extra register */
9941 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9942 kIemNativeGstRegUse_ReadOnly);
9943
9944 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9945
9946 /* Free but don't flush the FSW register. */
9947 iemNativeRegFreeTmp(pReNative, idxFswReg);
9948 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9949
9950 return off;
9951}
9952
9953
9954
9955#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9956
9957
9958/*********************************************************************************************************************************
9959* Emitters for SSE/AVX specific operations. *
9960*********************************************************************************************************************************/
9961
9962#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9963 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9964
9965/** Emits code for IEM_MC_COPY_XREG_U128. */
9966DECL_INLINE_THROW(uint32_t)
9967iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9968{
9969 /* This is a nop if the source and destination register are the same. */
9970 if (iXRegDst != iXRegSrc)
9971 {
9972 /* Allocate destination and source register. */
9973 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9974 kIemNativeGstSimdRegLdStSz_Low128,
9975 kIemNativeGstRegUse_ForFullWrite);
9976 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9977 kIemNativeGstSimdRegLdStSz_Low128,
9978 kIemNativeGstRegUse_ReadOnly);
9979
9980 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9981
9982 /* Free but don't flush the source and destination register. */
9983 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9984 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9985 }
9986
9987 return off;
9988}
9989
9990
9991#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9992 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9993
9994/** Emits code for IEM_MC_FETCH_XREG_U128. */
9995DECL_INLINE_THROW(uint32_t)
9996iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9997{
9998 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9999 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10000
10001 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10002 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
10003
10004 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10005
10006 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10007
10008 /* Free but don't flush the source register. */
10009 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10010 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10011
10012 return off;
10013}
10014
10015
10016#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
10017 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
10018
10019#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
10020 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
10021
10022/** Emits code for IEM_MC_FETCH_XREG_U64. */
10023DECL_INLINE_THROW(uint32_t)
10024iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
10025{
10026 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10027 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10028
10029 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10030 kIemNativeGstSimdRegLdStSz_Low128,
10031 kIemNativeGstRegUse_ReadOnly);
10032
10033 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10034 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10035
10036 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10037
10038 /* Free but don't flush the source register. */
10039 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10040 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10041
10042 return off;
10043}
10044
10045
10046#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
10047 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
10048
10049#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
10050 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
10051
10052/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
10053DECL_INLINE_THROW(uint32_t)
10054iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
10055{
10056 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10057 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10058
10059 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10060 kIemNativeGstSimdRegLdStSz_Low128,
10061 kIemNativeGstRegUse_ReadOnly);
10062
10063 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10064 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10065
10066 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10067
10068 /* Free but don't flush the source register. */
10069 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10070 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10071
10072 return off;
10073}
10074
10075
10076#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
10077 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
10078
10079/** Emits code for IEM_MC_FETCH_XREG_U16. */
10080DECL_INLINE_THROW(uint32_t)
10081iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
10082{
10083 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10084 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
10085
10086 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10087 kIemNativeGstSimdRegLdStSz_Low128,
10088 kIemNativeGstRegUse_ReadOnly);
10089
10090 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10091 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10092
10093 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
10094
10095 /* Free but don't flush the source register. */
10096 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10097 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10098
10099 return off;
10100}
10101
10102
10103#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
10104 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
10105
10106/** Emits code for IEM_MC_FETCH_XREG_U8. */
10107DECL_INLINE_THROW(uint32_t)
10108iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
10109{
10110 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10111 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
10112
10113 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10114 kIemNativeGstSimdRegLdStSz_Low128,
10115 kIemNativeGstRegUse_ReadOnly);
10116
10117 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10118 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10119
10120 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
10121
10122 /* Free but don't flush the source register. */
10123 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10124 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10125
10126 return off;
10127}
10128
10129
10130#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
10131 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
10132
10133AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
10134#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
10135 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
10136
10137
10138/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
10139DECL_INLINE_THROW(uint32_t)
10140iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10141{
10142 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10143 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10144
10145 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10146 kIemNativeGstSimdRegLdStSz_Low128,
10147 kIemNativeGstRegUse_ForFullWrite);
10148 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10149
10150 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10151
10152 /* Free but don't flush the source register. */
10153 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10154 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10155
10156 return off;
10157}
10158
10159
10160#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
10161 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
10162
10163#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
10164 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
10165
10166#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
10167 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
10168
10169#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
10170 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
10171
10172#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
10173 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
10174
10175#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
10176 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
10177
10178/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
10179DECL_INLINE_THROW(uint32_t)
10180iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
10181 uint8_t cbLocal, uint8_t iElem)
10182{
10183 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10184 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
10185
10186#ifdef VBOX_STRICT
10187 switch (cbLocal)
10188 {
10189 case sizeof(uint64_t): Assert(iElem < 2); break;
10190 case sizeof(uint32_t): Assert(iElem < 4); break;
10191 case sizeof(uint16_t): Assert(iElem < 8); break;
10192 case sizeof(uint8_t): Assert(iElem < 16); break;
10193 default: AssertFailed();
10194 }
10195#endif
10196
10197 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10198 kIemNativeGstSimdRegLdStSz_Low128,
10199 kIemNativeGstRegUse_ForUpdate);
10200 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10201
10202 switch (cbLocal)
10203 {
10204 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10205 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10206 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10207 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10208 default: AssertFailed();
10209 }
10210
10211 /* Free but don't flush the source register. */
10212 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10213 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10214
10215 return off;
10216}
10217
10218
10219#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
10220 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
10221
10222/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
10223DECL_INLINE_THROW(uint32_t)
10224iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10225{
10226 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10227 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10228
10229 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10230 kIemNativeGstSimdRegLdStSz_Low128,
10231 kIemNativeGstRegUse_ForUpdate);
10232 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10233
10234 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
10235 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10236 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10237
10238 /* Free but don't flush the source register. */
10239 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10240 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10241
10242 return off;
10243}
10244
10245
10246#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
10247 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
10248
10249/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
10250DECL_INLINE_THROW(uint32_t)
10251iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10252{
10253 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10254 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10255
10256 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10257 kIemNativeGstSimdRegLdStSz_Low128,
10258 kIemNativeGstRegUse_ForUpdate);
10259 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10260
10261 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
10262 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10263 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10264
10265 /* Free but don't flush the source register. */
10266 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10267 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10268
10269 return off;
10270}
10271
10272
10273#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
10274 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
10275
10276/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
10277DECL_INLINE_THROW(uint32_t)
10278iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
10279 uint8_t idxSrcVar, uint8_t iDwSrc)
10280{
10281 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10282 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10283
10284 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10285 kIemNativeGstSimdRegLdStSz_Low128,
10286 kIemNativeGstRegUse_ForUpdate);
10287 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10288
10289 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
10290 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
10291
10292 /* Free but don't flush the destination register. */
10293 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10294 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10295
10296 return off;
10297}
10298
10299
10300#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10301 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10302
10303/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
10304DECL_INLINE_THROW(uint32_t)
10305iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10306{
10307 /*
10308 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10309 * if iYRegDst gets allocated first for the full write it won't load the
10310 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10311 * duplicated from the already allocated host register for iYRegDst containing
10312 * garbage. This will be catched by the guest register value checking in debug
10313 * builds.
10314 */
10315 if (iYRegDst != iYRegSrc)
10316 {
10317 /* Allocate destination and source register. */
10318 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10319 kIemNativeGstSimdRegLdStSz_256,
10320 kIemNativeGstRegUse_ForFullWrite);
10321 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10322 kIemNativeGstSimdRegLdStSz_Low128,
10323 kIemNativeGstRegUse_ReadOnly);
10324
10325 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10326 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10327
10328 /* Free but don't flush the source and destination register. */
10329 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10330 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10331 }
10332 else
10333 {
10334 /* This effectively only clears the upper 128-bits of the register. */
10335 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10336 kIemNativeGstSimdRegLdStSz_High128,
10337 kIemNativeGstRegUse_ForFullWrite);
10338
10339 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10340
10341 /* Free but don't flush the destination register. */
10342 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10343 }
10344
10345 return off;
10346}
10347
10348
10349#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10350 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10351
10352/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
10353DECL_INLINE_THROW(uint32_t)
10354iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10355{
10356 /*
10357 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10358 * if iYRegDst gets allocated first for the full write it won't load the
10359 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10360 * duplicated from the already allocated host register for iYRegDst containing
10361 * garbage. This will be catched by the guest register value checking in debug
10362 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
10363 * for a zmm register we don't support yet, so this is just a nop.
10364 */
10365 if (iYRegDst != iYRegSrc)
10366 {
10367 /* Allocate destination and source register. */
10368 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10369 kIemNativeGstSimdRegLdStSz_256,
10370 kIemNativeGstRegUse_ReadOnly);
10371 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10372 kIemNativeGstSimdRegLdStSz_256,
10373 kIemNativeGstRegUse_ForFullWrite);
10374
10375 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10376
10377 /* Free but don't flush the source and destination register. */
10378 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10379 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10380 }
10381
10382 return off;
10383}
10384
10385
10386#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
10387 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
10388
10389/** Emits code for IEM_MC_FETCH_YREG_U128. */
10390DECL_INLINE_THROW(uint32_t)
10391iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
10392{
10393 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10394 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10395
10396 Assert(iDQWord <= 1);
10397 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10398 iDQWord == 1
10399 ? kIemNativeGstSimdRegLdStSz_High128
10400 : kIemNativeGstSimdRegLdStSz_Low128,
10401 kIemNativeGstRegUse_ReadOnly);
10402
10403 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10404 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10405
10406 if (iDQWord == 1)
10407 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10408 else
10409 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10410
10411 /* Free but don't flush the source register. */
10412 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10413 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10414
10415 return off;
10416}
10417
10418
10419#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
10420 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
10421
10422/** Emits code for IEM_MC_FETCH_YREG_U64. */
10423DECL_INLINE_THROW(uint32_t)
10424iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
10425{
10426 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10427 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10428
10429 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10430 iQWord >= 2
10431 ? kIemNativeGstSimdRegLdStSz_High128
10432 : kIemNativeGstSimdRegLdStSz_Low128,
10433 kIemNativeGstRegUse_ReadOnly);
10434
10435 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10436 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10437
10438 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10439
10440 /* Free but don't flush the source register. */
10441 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10442 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10443
10444 return off;
10445}
10446
10447
10448#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
10449 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
10450
10451/** Emits code for IEM_MC_FETCH_YREG_U32. */
10452DECL_INLINE_THROW(uint32_t)
10453iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
10454{
10455 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10456 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10457
10458 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10459 iDWord >= 4
10460 ? kIemNativeGstSimdRegLdStSz_High128
10461 : kIemNativeGstSimdRegLdStSz_Low128,
10462 kIemNativeGstRegUse_ReadOnly);
10463
10464 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10465 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10466
10467 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10468
10469 /* Free but don't flush the source register. */
10470 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10471 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10472
10473 return off;
10474}
10475
10476
10477#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
10478 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
10479
10480/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
10481DECL_INLINE_THROW(uint32_t)
10482iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10483{
10484 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10485 kIemNativeGstSimdRegLdStSz_High128,
10486 kIemNativeGstRegUse_ForFullWrite);
10487
10488 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10489
10490 /* Free but don't flush the register. */
10491 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10492
10493 return off;
10494}
10495
10496
10497#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
10498 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
10499
10500/** Emits code for IEM_MC_STORE_YREG_U128. */
10501DECL_INLINE_THROW(uint32_t)
10502iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
10503{
10504 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10505 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10506
10507 Assert(iDQword <= 1);
10508 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10509 iDQword == 0
10510 ? kIemNativeGstSimdRegLdStSz_Low128
10511 : kIemNativeGstSimdRegLdStSz_High128,
10512 kIemNativeGstRegUse_ForFullWrite);
10513
10514 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10515
10516 if (iDQword == 0)
10517 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10518 else
10519 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
10520
10521 /* Free but don't flush the source register. */
10522 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10523 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10524
10525 return off;
10526}
10527
10528
10529#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10530 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10531
10532/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
10533DECL_INLINE_THROW(uint32_t)
10534iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10535{
10536 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10537 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10538
10539 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10540 kIemNativeGstSimdRegLdStSz_256,
10541 kIemNativeGstRegUse_ForFullWrite);
10542
10543 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10544
10545 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10546 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10547
10548 /* Free but don't flush the source register. */
10549 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10550 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10551
10552 return off;
10553}
10554
10555
10556#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
10557 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
10558
10559/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
10560DECL_INLINE_THROW(uint32_t)
10561iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10562{
10563 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10564 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10565
10566 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10567 kIemNativeGstSimdRegLdStSz_256,
10568 kIemNativeGstRegUse_ForFullWrite);
10569
10570 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10571
10572 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10573 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10574
10575 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10576 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10577
10578 return off;
10579}
10580
10581
10582#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
10583 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
10584
10585/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
10586DECL_INLINE_THROW(uint32_t)
10587iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10588{
10589 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10590 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10591
10592 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10593 kIemNativeGstSimdRegLdStSz_256,
10594 kIemNativeGstRegUse_ForFullWrite);
10595
10596 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10597
10598 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10599 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10600
10601 /* Free but don't flush the source register. */
10602 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10603 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10604
10605 return off;
10606}
10607
10608
10609#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
10610 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
10611
10612/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
10613DECL_INLINE_THROW(uint32_t)
10614iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10615{
10616 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10617 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10618
10619 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10620 kIemNativeGstSimdRegLdStSz_256,
10621 kIemNativeGstRegUse_ForFullWrite);
10622
10623 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10624
10625 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10626 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10627
10628 /* Free but don't flush the source register. */
10629 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10630 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10631
10632 return off;
10633}
10634
10635
10636#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
10637 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
10638
10639/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
10640DECL_INLINE_THROW(uint32_t)
10641iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10642{
10643 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10644 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10645
10646 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10647 kIemNativeGstSimdRegLdStSz_256,
10648 kIemNativeGstRegUse_ForFullWrite);
10649
10650 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10651
10652 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10653 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10654
10655 /* Free but don't flush the source register. */
10656 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10657 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10658
10659 return off;
10660}
10661
10662
10663#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
10664 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
10665
10666/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
10667DECL_INLINE_THROW(uint32_t)
10668iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10669{
10670 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10671 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10672
10673 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10674 kIemNativeGstSimdRegLdStSz_256,
10675 kIemNativeGstRegUse_ForFullWrite);
10676
10677 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10678
10679 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10680
10681 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10682 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10683
10684 return off;
10685}
10686
10687
10688#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
10689 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
10690
10691/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
10692DECL_INLINE_THROW(uint32_t)
10693iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10694{
10695 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10696 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10697
10698 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10699 kIemNativeGstSimdRegLdStSz_256,
10700 kIemNativeGstRegUse_ForFullWrite);
10701
10702 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10703
10704 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10705
10706 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10707 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10708
10709 return off;
10710}
10711
10712
10713#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10714 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10715
10716/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
10717DECL_INLINE_THROW(uint32_t)
10718iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10719{
10720 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10721 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10722
10723 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10724 kIemNativeGstSimdRegLdStSz_256,
10725 kIemNativeGstRegUse_ForFullWrite);
10726
10727 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10728
10729 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10730
10731 /* Free but don't flush the source register. */
10732 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10733 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10734
10735 return off;
10736}
10737
10738
10739#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10740 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10741
10742/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
10743DECL_INLINE_THROW(uint32_t)
10744iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10745{
10746 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10747 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10748
10749 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10750 kIemNativeGstSimdRegLdStSz_256,
10751 kIemNativeGstRegUse_ForFullWrite);
10752
10753 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10754
10755 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10756
10757 /* Free but don't flush the source register. */
10758 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10759 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10760
10761 return off;
10762}
10763
10764
10765#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10766 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10767
10768/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
10769DECL_INLINE_THROW(uint32_t)
10770iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10771{
10772 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10773 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10774
10775 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10776 kIemNativeGstSimdRegLdStSz_256,
10777 kIemNativeGstRegUse_ForFullWrite);
10778
10779 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10780
10781 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
10782
10783 /* Free but don't flush the source register. */
10784 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10785 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10786
10787 return off;
10788}
10789
10790
10791#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10792 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10793
10794/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
10795DECL_INLINE_THROW(uint32_t)
10796iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10797{
10798 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10799 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10800
10801 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10802 kIemNativeGstSimdRegLdStSz_256,
10803 kIemNativeGstRegUse_ForFullWrite);
10804
10805 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10806
10807 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10808 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
10809
10810 /* Free but don't flush the source register. */
10811 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10812 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10813
10814 return off;
10815}
10816
10817
10818#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10819 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10820
10821/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
10822DECL_INLINE_THROW(uint32_t)
10823iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10824{
10825 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10826 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10827
10828 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10829 kIemNativeGstSimdRegLdStSz_256,
10830 kIemNativeGstRegUse_ForFullWrite);
10831
10832 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10833
10834 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10835 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10836
10837 /* Free but don't flush the source register. */
10838 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10839 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10840
10841 return off;
10842}
10843
10844
10845#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10846 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10847
10848/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10849DECL_INLINE_THROW(uint32_t)
10850iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10851{
10852 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10853 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10854
10855 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10856 kIemNativeGstSimdRegLdStSz_256,
10857 kIemNativeGstRegUse_ForFullWrite);
10858 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10859 kIemNativeGstSimdRegLdStSz_Low128,
10860 kIemNativeGstRegUse_ReadOnly);
10861 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10862
10863 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10864 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10865 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10866
10867 /* Free but don't flush the source and destination registers. */
10868 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10869 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10870 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10871
10872 return off;
10873}
10874
10875
10876#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10877 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10878
10879/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10880DECL_INLINE_THROW(uint32_t)
10881iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10882{
10883 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10884 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10885
10886 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10887 kIemNativeGstSimdRegLdStSz_256,
10888 kIemNativeGstRegUse_ForFullWrite);
10889 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10890 kIemNativeGstSimdRegLdStSz_Low128,
10891 kIemNativeGstRegUse_ReadOnly);
10892 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10893
10894 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10895 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10896 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10897
10898 /* Free but don't flush the source and destination registers. */
10899 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10900 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10901 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10902
10903 return off;
10904}
10905
10906
10907#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10908 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10909
10910
10911/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10912DECL_INLINE_THROW(uint32_t)
10913iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10914{
10915 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10916 kIemNativeGstSimdRegLdStSz_Low128,
10917 kIemNativeGstRegUse_ForUpdate);
10918
10919 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10920 if (bImm8Mask & RT_BIT(0))
10921 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10922 if (bImm8Mask & RT_BIT(1))
10923 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10924 if (bImm8Mask & RT_BIT(2))
10925 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10926 if (bImm8Mask & RT_BIT(3))
10927 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10928
10929 /* Free but don't flush the destination register. */
10930 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10931
10932 return off;
10933}
10934
10935
10936#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10937 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10938
10939#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10940 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10941
10942/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10943DECL_INLINE_THROW(uint32_t)
10944iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10945{
10946 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10947 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10948
10949 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10950 kIemNativeGstSimdRegLdStSz_256,
10951 kIemNativeGstRegUse_ReadOnly);
10952 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10953
10954 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10955
10956 /* Free but don't flush the source register. */
10957 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10958 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10959
10960 return off;
10961}
10962
10963
10964#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10965 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10966
10967#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10968 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10969
10970/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10971DECL_INLINE_THROW(uint32_t)
10972iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10973{
10974 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10975 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10976
10977 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10978 kIemNativeGstSimdRegLdStSz_256,
10979 kIemNativeGstRegUse_ForFullWrite);
10980 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10981
10982 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10983
10984 /* Free but don't flush the source register. */
10985 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10986 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10987
10988 return off;
10989}
10990
10991
10992#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10993 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10994
10995
10996/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10997DECL_INLINE_THROW(uint32_t)
10998iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10999 uint8_t idxSrcVar, uint8_t iDwSrc)
11000{
11001 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
11002 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
11003
11004 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
11005 iDwDst < 4
11006 ? kIemNativeGstSimdRegLdStSz_Low128
11007 : kIemNativeGstSimdRegLdStSz_High128,
11008 kIemNativeGstRegUse_ForUpdate);
11009 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
11010 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
11011
11012 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
11013 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
11014
11015 /* Free but don't flush the source register. */
11016 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
11017 iemNativeRegFreeTmp(pReNative, idxRegTmp);
11018 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
11019
11020 return off;
11021}
11022
11023
11024#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
11025 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
11026
11027
11028/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
11029DECL_INLINE_THROW(uint32_t)
11030iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
11031 uint8_t idxSrcVar, uint8_t iQwSrc)
11032{
11033 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
11034 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
11035
11036 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
11037 iQwDst < 2
11038 ? kIemNativeGstSimdRegLdStSz_Low128
11039 : kIemNativeGstSimdRegLdStSz_High128,
11040 kIemNativeGstRegUse_ForUpdate);
11041 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
11042 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
11043
11044 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
11045 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
11046
11047 /* Free but don't flush the source register. */
11048 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
11049 iemNativeRegFreeTmp(pReNative, idxRegTmp);
11050 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
11051
11052 return off;
11053}
11054
11055
11056#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
11057 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
11058
11059
11060/** Emits code for IEM_MC_STORE_YREG_U64. */
11061DECL_INLINE_THROW(uint32_t)
11062iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
11063{
11064 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
11065 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
11066
11067 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
11068 iQwDst < 2
11069 ? kIemNativeGstSimdRegLdStSz_Low128
11070 : kIemNativeGstSimdRegLdStSz_High128,
11071 kIemNativeGstRegUse_ForUpdate);
11072
11073 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
11074
11075 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
11076
11077 /* Free but don't flush the source register. */
11078 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
11079 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
11080
11081 return off;
11082}
11083
11084
11085#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
11086 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
11087
11088/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
11089DECL_INLINE_THROW(uint32_t)
11090iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
11091{
11092 RT_NOREF(pReNative, iYReg);
11093 /** @todo Needs to be implemented when support for AVX-512 is added. */
11094 return off;
11095}
11096
11097
11098
11099/*********************************************************************************************************************************
11100* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
11101*********************************************************************************************************************************/
11102
11103/**
11104 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
11105 */
11106DECL_INLINE_THROW(uint32_t)
11107iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
11108{
11109 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
11110 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
11111 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
11112 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
11113
11114#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
11115 /*
11116 * Need to do the FPU preparation.
11117 */
11118 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
11119#endif
11120
11121 /*
11122 * Do all the call setup and cleanup.
11123 */
11124 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
11125 false /*fFlushPendingWrites*/);
11126
11127 /*
11128 * Load the MXCSR register into the first argument and mask out the current exception flags.
11129 */
11130 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
11131 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
11132
11133 /*
11134 * Make the call.
11135 */
11136 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
11137
11138 /*
11139 * The updated MXCSR is in the return register, update exception status flags.
11140 *
11141 * The return register is marked allocated as a temporary because it is required for the
11142 * exception generation check below.
11143 */
11144 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
11145 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
11146 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
11147
11148#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
11149 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
11150 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
11151#endif
11152
11153 /*
11154 * Make sure we don't have any outstanding guest register writes as we may
11155 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
11156 */
11157 off = iemNativeRegFlushPendingWrites(pReNative, off);
11158
11159#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11160 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11161#else
11162 RT_NOREF(idxInstr);
11163#endif
11164
11165 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
11166 * want to assume the existence for this instruction at the moment. */
11167 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
11168
11169 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
11170 /* tmp &= X86_MXCSR_XCPT_MASK */
11171 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
11172 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
11173 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
11174 /* tmp = ~tmp */
11175 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
11176 /* tmp &= mxcsr */
11177 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
11178 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseSseAvxFpRelated>(pReNative, off, idxRegTmp,
11179 X86_MXCSR_XCPT_FLAGS);
11180
11181 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
11182 iemNativeRegFreeTmp(pReNative, idxRegTmp);
11183 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
11184
11185 return off;
11186}
11187
11188
11189#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
11190 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11191
11192/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
11193DECL_INLINE_THROW(uint32_t)
11194iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11195{
11196 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11197 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11198 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11199}
11200
11201
11202#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11203 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11204
11205/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
11206DECL_INLINE_THROW(uint32_t)
11207iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11208 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11209{
11210 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11211 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11212 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11213 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11214}
11215
11216
11217/*********************************************************************************************************************************
11218* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
11219*********************************************************************************************************************************/
11220
11221#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
11222 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11223
11224/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
11225DECL_INLINE_THROW(uint32_t)
11226iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11227{
11228 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11229 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11230 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11231}
11232
11233
11234#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11235 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11236
11237/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
11238DECL_INLINE_THROW(uint32_t)
11239iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11240 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11241{
11242 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11243 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11244 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11245 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11246}
11247
11248
11249#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
11250
11251
11252/*********************************************************************************************************************************
11253* Include instruction emitters. *
11254*********************************************************************************************************************************/
11255#include "target-x86/IEMAllN8veEmit-x86.h"
11256
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette