VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 107631

Last change on this file since 107631 was 107200, checked in by vboxsync, 7 weeks ago

VMM/IEM: Deal with hidden pointer to VBOXSTRICTRC return struct on win.arm64. jiraref:VBP-1466

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 540.3 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 107200 2024-11-29 22:15:46Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62#include "target-x86/IEMAllN8veEmit-x86.h"
63
64
65/*
66 * Narrow down configs here to avoid wasting time on unused configs here.
67 * Note! Same checks in IEMAllThrdRecompiler.cpp.
68 */
69
70#ifndef IEM_WITH_CODE_TLB
71# error The code TLB must be enabled for the recompiler.
72#endif
73
74#ifndef IEM_WITH_DATA_TLB
75# error The data TLB must be enabled for the recompiler.
76#endif
77
78#ifndef IEM_WITH_SETJMP
79# error The setjmp approach must be enabled for the recompiler.
80#endif
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88
89# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
90/**
91 * Updates IEMCPU::uPcUpdatingDebug.
92 */
93DECL_INLINE_THROW(uint32_t) iemNativeEmitPcDebugAdd(PIEMRECOMPILERSTATE pReNative, uint32_t off, int64_t offDisp, uint8_t cBits)
94{
95# ifdef RT_ARCH_AMD64
96 if (pReNative->Core.fDebugPcInitialized && cBits >= 32)
97 {
98 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
99 if ((int32_t)offDisp == offDisp || cBits != 64)
100 {
101 /* add [q]word [pVCpu->iem.s.uPcUpdatingDebug], imm32/imm8 */
102 if (cBits == 64)
103 pCodeBuf[off++] = X86_OP_REX_W;
104 pCodeBuf[off++] = (int8_t)offDisp == offDisp ? 0x83 : 0x81;
105 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
106 if ((int8_t)offDisp == offDisp)
107 pCodeBuf[off++] = (int8_t)offDisp;
108 else
109 {
110 *(int32_t *)&pCodeBuf[off] = (int32_t)offDisp;
111 off += sizeof(int32_t);
112 }
113 }
114 else
115 {
116 /* mov tmp0, imm64 */
117 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, offDisp);
118
119 /* add [pVCpu->iem.s.uPcUpdatingDebug], tmp0 */
120 if (cBits == 64)
121 pCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
122 else if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
123 pCodeBuf[off++] = X86_OP_REX_R;
124 pCodeBuf[off++] = 0x01;
125 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0 & 7,
126 RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
127 }
128 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
129 return off;
130 }
131# endif
132
133 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
134 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 12);
135
136 if (pReNative->Core.fDebugPcInitialized)
137 {
138 Log4(("uPcUpdatingDebug+=%ld cBits=%d off=%#x\n", offDisp, cBits, off));
139 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
140 }
141 else
142 {
143 Log4(("uPcUpdatingDebug=rip+%ld cBits=%d off=%#x\n", offDisp, cBits, off));
144 pReNative->Core.fDebugPcInitialized = true;
145 off = iemNativeEmitLoadGprWithGstRegExT<kIemNativeGstReg_Pc>(pCodeBuf, off, idxTmpReg);
146 }
147
148 if (cBits == 64)
149 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxTmpReg, offDisp, IEMNATIVE_REG_FIXED_TMP0);
150 else
151 {
152 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxTmpReg, (int32_t)offDisp, IEMNATIVE_REG_FIXED_TMP0);
153 if (cBits == 16)
154 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, UINT16_MAX);
155 }
156
157 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug),
158 IEMNATIVE_REG_FIXED_TMP0);
159
160 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
161 iemNativeRegFreeTmp(pReNative, idxTmpReg);
162 return off;
163}
164
165
166# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
167DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
168{
169 /* Compare the shadow with the context value, they should match. */
170 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
171 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
172 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
173 return off;
174}
175# endif
176
177#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
178
179/**
180 * Flushes delayed write of a specific guest register.
181 *
182 * This must be called prior to calling CImpl functions and any helpers that use
183 * the guest state (like raising exceptions) and such.
184 *
185 * This optimization has not yet been implemented. The first target would be
186 * RIP updates, since these are the most common ones.
187 */
188template<IEMNATIVEGSTREGREF a_enmClass>
189DECL_INLINE_THROW(uint32_t)
190iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
191{
192#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
193 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
194#endif
195
196#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
197# if 0 /** @todo r=aeichner EFLAGS writeback delay. */
198 if RT_CONSTEXPR_IF(a_enmClass == kIemNativeGstRegRef_EFlags)
199 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
200 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
201# else
202 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
203# endif
204
205 if RT_CONSTEXPR_IF(a_enmClass == kIemNativeGstRegRef_Gpr)
206 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
207 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
208#endif
209
210 if RT_CONSTEXPR_IF(a_enmClass == kIemNativeGstRegRef_XReg)
211 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
212 {
213 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
214 /* Flush the shadows as the register needs to be reloaded (there is no
215 guarantee right now, that the referenced register doesn't change). */
216 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
217
218 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
219 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
220 }
221
222 return off;
223}
224
225
226
227/*********************************************************************************************************************************
228* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
229*********************************************************************************************************************************/
230
231#undef IEM_MC_BEGIN /* unused */
232#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
233 { \
234 Assert(pReNative->Core.bmVars == 0); \
235 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
236 Assert(pReNative->Core.bmStack == 0); \
237 pReNative->fMc = (a_fMcFlags); \
238 pReNative->fCImpl = (a_fCImplFlags); \
239 pReNative->cArgsX = (a_cArgsIncludingHidden)
240
241/** We have to get to the end in recompilation mode, as otherwise we won't
242 * generate code for all the IEM_MC_IF_XXX branches. */
243#define IEM_MC_END() \
244 iemNativeVarFreeAll(pReNative); \
245 } return off
246
247
248
249/*********************************************************************************************************************************
250* Liveness Stubs *
251*********************************************************************************************************************************/
252
253#define IEM_MC_LIVENESS_GREG_INPUT(a_iGReg) ((void)0)
254#define IEM_MC_LIVENESS_GREG_CLOBBER(a_iGReg) ((void)0)
255#define IEM_MC_LIVENESS_GREG_MODIFY(a_iGReg) ((void)0)
256
257#define IEM_MC_LIVENESS_MREG_INPUT(a_iMReg) ((void)0)
258#define IEM_MC_LIVENESS_MREG_CLOBBER(a_iMReg) ((void)0)
259#define IEM_MC_LIVENESS_MREG_MODIFY(a_iMReg) ((void)0)
260
261#define IEM_MC_LIVENESS_XREG_INPUT(a_iXReg) ((void)0)
262#define IEM_MC_LIVENESS_XREG_CLOBBER(a_iXReg) ((void)0)
263#define IEM_MC_LIVENESS_XREG_MODIFY(a_iXReg) ((void)0)
264
265#define IEM_MC_LIVENESS_MXCSR_INPUT() ((void)0)
266#define IEM_MC_LIVENESS_MXCSR_CLOBBER() ((void)0)
267#define IEM_MC_LIVENESS_MXCSR_MODIFY() ((void)0)
268
269
270/*********************************************************************************************************************************
271* Native Emitter Support. *
272*********************************************************************************************************************************/
273
274#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
275
276#define IEM_MC_NATIVE_ELSE() } else {
277
278#define IEM_MC_NATIVE_ENDIF() } ((void)0)
279
280
281#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
282 off = a_fnEmitter(pReNative, off)
283
284#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
285 off = a_fnEmitter(pReNative, off, (a0))
286
287#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
288 off = a_fnEmitter(pReNative, off, (a0), (a1))
289
290#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
291 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
292
293#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
294 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
295
296#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
297 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
298
299#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
300 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
301
302#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
303 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
304
305#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
306 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
307
308#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
309 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
310
311
312#ifndef RT_ARCH_AMD64
313# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
314#else
315/** @note This is a naive approach that ASSUMES that the register isn't
316 * allocated, so it only works safely for the first allocation(s) in
317 * a MC block. */
318# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
319 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
320
321DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg,
322 uint32_t off, bool fAllocated);
323
324DECL_INLINE_THROW(uint32_t)
325iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
326{
327 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
328 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
329 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
330
331# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
332 /* Must flush the register if it hold pending writes. */
333 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
334 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
335 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
336# endif
337
338 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off, false /*fAllocated*/);
339 return off;
340}
341
342#endif /* RT_ARCH_AMD64 */
343
344
345
346/*********************************************************************************************************************************
347* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
348*********************************************************************************************************************************/
349
350#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
351 pReNative->fMc = 0; \
352 pReNative->fCImpl = (a_fFlags); \
353 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
354 a_cbInstr) /** @todo not used ... */
355
356
357#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
358 pReNative->fMc = 0; \
359 pReNative->fCImpl = (a_fFlags); \
360 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
361
362DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
363 uint8_t idxInstr, uint64_t a_fGstShwFlush,
364 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
365{
366 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
367}
368
369
370#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
371 pReNative->fMc = 0; \
372 pReNative->fCImpl = (a_fFlags); \
373 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
374 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
375
376DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
377 uint8_t idxInstr, uint64_t a_fGstShwFlush,
378 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
379{
380 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
381}
382
383
384#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
385 pReNative->fMc = 0; \
386 pReNative->fCImpl = (a_fFlags); \
387 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
388 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
389
390DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
391 uint8_t idxInstr, uint64_t a_fGstShwFlush,
392 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
393 uint64_t uArg2)
394{
395 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
396}
397
398
399
400/*********************************************************************************************************************************
401* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
402*********************************************************************************************************************************/
403
404/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
405 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
406DECL_INLINE_THROW(uint32_t)
407iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
408{
409 /*
410 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
411 * return with special status code and make the execution loop deal with
412 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
413 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
414 * could continue w/o interruption, it probably will drop into the
415 * debugger, so not worth the effort of trying to services it here and we
416 * just lump it in with the handling of the others.
417 *
418 * To simplify the code and the register state management even more (wrt
419 * immediate in AND operation), we always update the flags and skip the
420 * extra check associated conditional jump.
421 */
422 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
423 <= UINT32_MAX);
424#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
425 AssertMsg( pReNative->idxCurCall == 0
426 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
427 IEMLIVENESSBIT_IDX_EFL_OTHER)),
428 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
429 IEMLIVENESSBIT_IDX_EFL_OTHER)));
430#endif
431
432 /*
433 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
434 * any pending register writes must be flushed.
435 */
436 off = iemNativeRegFlushPendingWrites(pReNative, off);
437
438 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
439 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER),
440 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER));
441 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_ReturnWithFlags>(pReNative, off, idxEflReg,
442 X86_EFL_TF
443 | CPUMCTX_DBG_HIT_DRX_MASK
444 | CPUMCTX_DBG_DBGF_MASK);
445 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
446 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, idxEflReg);
447
448 /* Free but don't flush the EFLAGS register. */
449 iemNativeRegFreeTmp(pReNative, idxEflReg);
450
451 return off;
452}
453
454
455/** Helper for iemNativeEmitFinishInstructionWithStatus. */
456DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
457{
458 unsigned const offOpcodes = pCallEntry->offOpcode;
459 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
460 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
461 {
462 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
463 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
464 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
465 }
466 AssertFailedReturn(NIL_RTGCPHYS);
467}
468
469
470/** The VINF_SUCCESS dummy. */
471template<int const a_rcNormal, bool const a_fIsJump>
472DECL_FORCE_INLINE_THROW(uint32_t)
473iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
474 int32_t const offJump)
475{
476 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
477 if (a_rcNormal != VINF_SUCCESS)
478 {
479#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
480 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
481#else
482 RT_NOREF_PV(pCallEntry);
483#endif
484
485 /* As this code returns from the TB any pending register writes must be flushed. */
486 off = iemNativeRegFlushPendingWrites(pReNative, off);
487
488 /*
489 * If we're in a conditional, mark the current branch as exiting so we
490 * can disregard its state when we hit the IEM_MC_ENDIF.
491 */
492 iemNativeMarkCurCondBranchAsExiting(pReNative);
493
494 /*
495 * Use the lookup table for getting to the next TB quickly.
496 * Note! In this code path there can only be one entry at present.
497 */
498 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
499 PCIEMTB const pTbOrg = pReNative->pTbOrg;
500 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
501 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
502
503#if 0
504 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
505 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
506 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
507 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
508 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
509
510 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
511
512#else
513 /* Load the index as argument #1 for the helper call at the given label. */
514 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
515
516 /*
517 * Figure out the physical address of the current instruction and see
518 * whether the next instruction we're about to execute is in the same
519 * page so we by can optimistically skip TLB loading.
520 *
521 * - This is safe for all cases in FLAT mode.
522 * - In segmentmented modes it is complicated, given that a negative
523 * jump may underflow EIP and a forward jump may overflow or run into
524 * CS.LIM and triggering a #GP. The only thing we can get away with
525 * now at compile time is forward jumps w/o CS.LIM checks, since the
526 * lack of CS.LIM checks means we're good for the entire physical page
527 * we're executing on and another 15 bytes before we run into CS.LIM.
528 */
529 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
530# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
531 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
532# endif
533 )
534 {
535 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
536 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
537 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
538 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
539
540 {
541 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
542 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
543
544 /* Load the key lookup flags into the 2nd argument for the helper call.
545 - This is safe wrt CS limit checking since we're only here for FLAT modes.
546 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
547 interrupt shadow.
548 - The NMI inhibiting is more questionable, though... */
549 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
550 * Should we copy it into fExec to simplify this? OTOH, it's just a
551 * couple of extra instructions if EFLAGS are already in a register. */
552 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
553 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
554
555 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
556 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookup>(pReNative, off);
557 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithIrq>(pReNative, off);
558 }
559 }
560 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
561 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlb>(pReNative, off);
562 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq>(pReNative, off);
563#endif
564 }
565 return off;
566}
567
568
569#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
570 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
571 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
572
573#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
574 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
575 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
576 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
577
578/** Same as iemRegAddToRip64AndFinishingNoFlags. */
579DECL_INLINE_THROW(uint32_t)
580iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
581{
582#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
583# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
584 if (!pReNative->Core.offPc)
585 off = iemNativeEmitLoadGprWithGstShadowRegT<kIemNativeGstReg_Pc>(pNative, off, IEMNATIVE_REG_FIXED_PC_DBG);
586# endif
587
588 /* Allocate a temporary PC register. */
589 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
590
591 /* Perform the addition and store the result. */
592 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
593 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
594
595 /* Free but don't flush the PC register. */
596 iemNativeRegFreeTmp(pReNative, idxPcReg);
597#endif
598
599#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
600 pReNative->Core.offPc += cbInstr;
601 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
602# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
603 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 64);
604 off = iemNativeEmitPcDebugCheck(pReNative, off);
605# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
606 off = iemNativePcAdjustCheck(pReNative, off);
607# endif
608 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
609#endif
610
611 return off;
612}
613
614
615#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
616 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
617 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
618
619#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
620 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
621 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
622 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
623
624/** Same as iemRegAddToEip32AndFinishingNoFlags. */
625DECL_INLINE_THROW(uint32_t)
626iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
627{
628#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
629# ifdef IEMNATIVE_REG_FIXED_PC_DBG
630 if (!pReNative->Core.offPc)
631 off = iemNativeEmitLoadGprWithGstShadowRegT<kIemNativeGstReg_Pc>(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG);
632# endif
633
634 /* Allocate a temporary PC register. */
635 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
636
637 /* Perform the addition and store the result. */
638 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
639 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
640
641 /* Free but don't flush the PC register. */
642 iemNativeRegFreeTmp(pReNative, idxPcReg);
643#endif
644
645#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
646 pReNative->Core.offPc += cbInstr;
647 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
648# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
649 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 32);
650 off = iemNativeEmitPcDebugCheck(pReNative, off);
651# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
652 off = iemNativePcAdjustCheck(pReNative, off);
653# endif
654 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
655#endif
656
657 return off;
658}
659
660
661#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
662 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
663 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
664
665#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
666 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
667 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
668 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
669
670/** Same as iemRegAddToIp16AndFinishingNoFlags. */
671DECL_INLINE_THROW(uint32_t)
672iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
673{
674#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
675# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
676 if (!pReNative->Core.offPc)
677 off = iemNativeEmitLoadGprWithGstShadowRegT<kIemNativeGstReg_Pc>(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG);
678# endif
679
680 /* Allocate a temporary PC register. */
681 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
682
683 /* Perform the addition and store the result. */
684 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
685 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
686 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
687
688 /* Free but don't flush the PC register. */
689 iemNativeRegFreeTmp(pReNative, idxPcReg);
690#endif
691
692#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
693 pReNative->Core.offPc += cbInstr;
694 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
695# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
696 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 16);
697 off = iemNativeEmitPcDebugCheck(pReNative, off);
698# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
699 off = iemNativePcAdjustCheck(pReNative, off);
700# endif
701 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
702#endif
703
704 return off;
705}
706
707
708/*********************************************************************************************************************************
709* Common code for changing PC/RIP/EIP/IP. *
710*********************************************************************************************************************************/
711
712/**
713 * Emits code to check if the content of @a idxAddrReg is a canonical address,
714 * raising a \#GP(0) if it isn't.
715 *
716 * @returns New code buffer offset, UINT32_MAX on failure.
717 * @param pReNative The native recompile state.
718 * @param off The code buffer offset.
719 * @param idxAddrReg The host register with the address to check.
720 * @param idxInstr The current instruction.
721 */
722DECL_FORCE_INLINE_THROW(uint32_t)
723iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
724{
725 /*
726 * Make sure we don't have any outstanding guest register writes as we may
727 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
728 */
729 off = iemNativeRegFlushPendingWrites(pReNative, off);
730
731#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
732 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
733#else
734 RT_NOREF(idxInstr);
735#endif
736
737#ifdef RT_ARCH_AMD64
738 /*
739 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
740 * return raisexcpt();
741 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
742 */
743 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
744
745 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
746 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
747 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
748 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
749 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
750
751 iemNativeRegFreeTmp(pReNative, iTmpReg);
752
753#elif defined(RT_ARCH_ARM64)
754 /*
755 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
756 * return raisexcpt();
757 * ----
758 * mov x1, 0x800000000000
759 * add x1, x0, x1
760 * cmp xzr, x1, lsr 48
761 * b.ne .Lraisexcpt
762 */
763 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
764
765 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
766 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
767 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
768 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
769
770 iemNativeRegFreeTmp(pReNative, iTmpReg);
771
772#else
773# error "Port me"
774#endif
775 return off;
776}
777
778
779/**
780 * Emits code to check if the content of @a idxAddrReg is a canonical address,
781 * raising a \#GP(0) if it isn't.
782 *
783 * Caller makes sure everything is flushed, except maybe PC.
784 *
785 * @returns New code buffer offset, UINT32_MAX on failure.
786 * @param pReNative The native recompile state.
787 * @param off The code buffer offset.
788 * @param idxAddrReg The host register with the address to check.
789 * @param offDisp The relative displacement that has already been
790 * added to idxAddrReg and must be subtracted if
791 * raising a \#GP(0).
792 * @param idxInstr The current instruction.
793 */
794DECL_FORCE_INLINE_THROW(uint32_t)
795iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
796 uint8_t idxAddrReg, int64_t offDisp, uint8_t idxInstr)
797{
798#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
799 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
800#endif
801
802#ifdef RT_ARCH_AMD64
803 /*
804 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
805 * return raisexcpt();
806 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
807 */
808 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
809
810 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
811 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
812 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
813 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
814
815#elif defined(RT_ARCH_ARM64)
816 /*
817 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
818 * return raisexcpt();
819 * ----
820 * mov x1, 0x800000000000
821 * add x1, x0, x1
822 * cmp xzr, x1, lsr 48
823 * b.ne .Lraisexcpt
824 */
825 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
826
827 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
828 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
829 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
830#else
831# error "Port me"
832#endif
833
834 /* Jump to the #GP code (hoping static prediction considers forward branches as not-taken). */
835 uint32_t const offFixup1 = off;
836 off = iemNativeEmitJnzToFixed(pReNative, off, off /*8-bit jump suffices*/);
837
838 /* jump .Lnoexcept; Skip the #GP code. */
839 uint32_t const offFixup2 = off;
840 off = iemNativeEmitJmpToFixed(pReNative, off, off /*8-bit jump suffices*/);
841
842 /* .Lraisexcpt: */
843 iemNativeFixupFixedJump(pReNative, offFixup1, off);
844#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
845 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
846#else
847 RT_NOREF(idxInstr);
848#endif
849
850 /* Undo the PC adjustment and store the old PC value. */
851 off = iemNativeEmitSubGprImm(pReNative, off, idxAddrReg, offDisp, iTmpReg);
852 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxAddrReg);
853
854 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
855
856 /* .Lnoexcept: */
857 iemNativeFixupFixedJump(pReNative, offFixup2, off);
858
859 iemNativeRegFreeTmp(pReNative, iTmpReg);
860 return off;
861}
862
863
864/**
865 * Emits code to check if the content of @a idxAddrReg is a canonical address,
866 * raising a \#GP(0) if it isn't.
867 *
868 * Caller makes sure everything is flushed, except maybe PC.
869 *
870 * @returns New code buffer offset, UINT32_MAX on failure.
871 * @param pReNative The native recompile state.
872 * @param off The code buffer offset.
873 * @param idxAddrReg The host register with the address to check.
874 * @param idxOldPcReg Register holding the old PC that offPc is relative
875 * to if available, otherwise UINT8_MAX.
876 * @param idxInstr The current instruction.
877 */
878DECL_FORCE_INLINE_THROW(uint32_t)
879iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
880 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
881{
882#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
883 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
884#endif
885
886#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
887# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
888 if (!pReNative->Core.offPc)
889# endif
890 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
891#else
892 RT_NOREF(idxInstr);
893#endif
894
895#ifdef RT_ARCH_AMD64
896 /*
897 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
898 * return raisexcpt();
899 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
900 */
901 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
902
903 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
904 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
905 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
906 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
907
908#elif defined(RT_ARCH_ARM64)
909 /*
910 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
911 * return raisexcpt();
912 * ----
913 * mov x1, 0x800000000000
914 * add x1, x0, x1
915 * cmp xzr, x1, lsr 48
916 * b.ne .Lraisexcpt
917 */
918 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
919
920 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
921 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
922 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
923#else
924# error "Port me"
925#endif
926
927#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
928 if (pReNative->Core.offPc)
929 {
930 /** @todo On x86, it is said that conditional jumps forward are statically
931 * predicited as not taken, so this isn't a very good construct.
932 * Investigate whether it makes sense to invert it and add another
933 * jump. Also, find out wtf the static predictor does here on arm! */
934 uint32_t const offFixup = off;
935 off = iemNativeEmitJzToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
936
937 /* .Lraisexcpt: */
938# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
939 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
940# endif
941 /* We need to update cpum.GstCtx.rip. */
942 if (idxOldPcReg == UINT8_MAX)
943 {
944 idxOldPcReg = iTmpReg;
945 off = iemNativeEmitLoadGprWithGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxOldPcReg);
946 }
947 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
948 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxOldPcReg);
949
950 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
951 iemNativeFixupFixedJump(pReNative, offFixup, off);
952 }
953 else
954#endif
955 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
956
957 iemNativeRegFreeTmp(pReNative, iTmpReg);
958
959 return off;
960}
961
962
963/**
964 * Emits code to check if that the content of @a idxAddrReg is within the limit
965 * of CS, raising a \#GP(0) if it isn't.
966 *
967 * @returns New code buffer offset; throws VBox status code on error.
968 * @param pReNative The native recompile state.
969 * @param off The code buffer offset.
970 * @param idxAddrReg The host register (32-bit) with the address to
971 * check.
972 * @param idxInstr The current instruction.
973 */
974DECL_FORCE_INLINE_THROW(uint32_t)
975iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
976 uint8_t idxAddrReg, uint8_t idxInstr)
977{
978 /*
979 * Make sure we don't have any outstanding guest register writes as we may
980 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
981 */
982 off = iemNativeRegFlushPendingWrites(pReNative, off);
983
984#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
985 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
986#else
987 RT_NOREF(idxInstr);
988#endif
989
990 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
991 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
992 kIemNativeGstRegUse_ReadOnly);
993
994 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
995 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
996
997 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
998 return off;
999}
1000
1001
1002
1003
1004/**
1005 * Emits code to check if that the content of @a idxAddrReg is within the limit
1006 * of CS, raising a \#GP(0) if it isn't.
1007 *
1008 * Caller makes sure everything is flushed, except maybe PC.
1009 *
1010 * @returns New code buffer offset; throws VBox status code on error.
1011 * @param pReNative The native recompile state.
1012 * @param off The code buffer offset.
1013 * @param idxAddrReg The host register (32-bit) with the address to
1014 * check.
1015 * @param idxOldPcReg Register holding the old PC that offPc is relative
1016 * to if available, otherwise UINT8_MAX.
1017 * @param idxInstr The current instruction.
1018 */
1019DECL_FORCE_INLINE_THROW(uint32_t)
1020iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1021 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
1022{
1023#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1024 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1025#endif
1026
1027#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1028# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1029 if (!pReNative->Core.offPc)
1030# endif
1031 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1032#else
1033 RT_NOREF(idxInstr);
1034#endif
1035
1036 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
1037 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
1038 kIemNativeGstRegUse_ReadOnly);
1039
1040 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
1041#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1042 if (pReNative->Core.offPc)
1043 {
1044 uint32_t const offFixup = off;
1045 off = iemNativeEmitJbeToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
1046
1047 /* Raising a GP(0), but first we need to update cpum.GstCtx.rip. */
1048 if (idxOldPcReg == UINT8_MAX)
1049 {
1050 idxOldPcReg = idxAddrReg;
1051 off = iemNativeEmitLoadGprWithGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxOldPcReg);
1052 }
1053 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
1054 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxOldPcReg);
1055# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1056 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1057# endif
1058 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
1059 iemNativeFixupFixedJump(pReNative, offFixup, off);
1060 }
1061 else
1062#endif
1063 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
1064
1065 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1066 return off;
1067}
1068
1069
1070/*********************************************************************************************************************************
1071* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
1072*********************************************************************************************************************************/
1073
1074#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1075 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1076 (a_enmEffOpSize), pCallEntry->idxInstr); \
1077 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1078
1079#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1080 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1081 (a_enmEffOpSize), pCallEntry->idxInstr); \
1082 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1083 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1084
1085#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
1086 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1087 IEMMODE_16BIT, pCallEntry->idxInstr); \
1088 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1089
1090#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1091 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1092 IEMMODE_16BIT, pCallEntry->idxInstr); \
1093 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1094 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1095
1096#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
1097 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1098 IEMMODE_64BIT, pCallEntry->idxInstr); \
1099 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1100
1101#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1102 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1103 IEMMODE_64BIT, pCallEntry->idxInstr); \
1104 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1105 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1106
1107
1108#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1109 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1110 (a_enmEffOpSize), pCallEntry->idxInstr); \
1111 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1112
1113#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1114 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1115 (a_enmEffOpSize), pCallEntry->idxInstr); \
1116 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1117 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1118
1119#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
1120 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1121 IEMMODE_16BIT, pCallEntry->idxInstr); \
1122 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1123
1124#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1125 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1126 IEMMODE_16BIT, pCallEntry->idxInstr); \
1127 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1128 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1129
1130#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
1131 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1132 IEMMODE_64BIT, pCallEntry->idxInstr); \
1133 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1134
1135#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1136 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1137 IEMMODE_64BIT, pCallEntry->idxInstr); \
1138 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1139 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1140
1141/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
1142 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
1143 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
1144template<bool const a_fWithinPage>
1145DECL_INLINE_THROW(uint32_t)
1146iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1147 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1148{
1149 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
1150#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1151 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1152 if (a_fWithinPage && enmEffOpSize == IEMMODE_64BIT)
1153 {
1154 /* No #GP checking required, just update offPc and get on with it. */
1155 pReNative->Core.offPc += (int64_t)offDisp + cbInstr;
1156# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1157 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1158# endif
1159 }
1160 else
1161#endif
1162 {
1163 /* Flush all but PC iff we're doing a 64-bit update here and this isn't within a page.. */
1164 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT && !a_fWithinPage))
1165 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1166
1167 /* Allocate a temporary PC register. */
1168 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1169 kIemNativeGstRegUse_ForUpdate);
1170
1171 /* Perform the addition. */
1172 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr + pReNative->Core.offPc);
1173
1174 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
1175 {
1176 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
1177 We can skip this if the target is within the same page. */
1178 if (!a_fWithinPage)
1179 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(pReNative, off, idxPcReg,
1180 (int64_t)offDisp + cbInstr, idxInstr);
1181 }
1182 else
1183 {
1184 /* Just truncate the result to 16-bit IP. */
1185 Assert(enmEffOpSize == IEMMODE_16BIT);
1186 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1187 }
1188
1189#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1190# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1191 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1192 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1193# endif
1194 /* Since we've already got the new PC value in idxPcReg, we can just as
1195 well write it out and reset offPc to zero. Otherwise, we'd need to use
1196 a copy the shadow PC, which will cost another move instruction here. */
1197# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS)
1198 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
1199 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
1200 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
1201 Log4(("iemNativeEmitRip64RelativeJumpAndFinishingNoFlags: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u cCondDepth=%d\n",
1202 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped, pReNative->cCondDepth));
1203 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
1204# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1205 iemNativeDbgInfoAddNativeOffset(pReNative, off);
1206 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
1207# endif
1208# endif
1209 pReNative->Core.offPc = 0;
1210#endif
1211
1212 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
1213
1214 /* Free but don't flush the PC register. */
1215 iemNativeRegFreeTmp(pReNative, idxPcReg);
1216 }
1217 return off;
1218}
1219
1220
1221#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1222 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1223 (a_enmEffOpSize), pCallEntry->idxInstr); \
1224 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1225
1226#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1227 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1228 (a_enmEffOpSize), pCallEntry->idxInstr); \
1229 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1230 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1231
1232#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
1233 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1234 IEMMODE_16BIT, pCallEntry->idxInstr); \
1235 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1236
1237#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1238 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1239 IEMMODE_16BIT, pCallEntry->idxInstr); \
1240 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1241 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1242
1243#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
1244 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1245 IEMMODE_32BIT, pCallEntry->idxInstr); \
1246 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1247
1248#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1249 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1250 IEMMODE_32BIT, pCallEntry->idxInstr); \
1251 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1252 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1253
1254
1255#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1256 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1257 (a_enmEffOpSize), pCallEntry->idxInstr); \
1258 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1259
1260#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1261 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1262 (a_enmEffOpSize), pCallEntry->idxInstr); \
1263 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1264 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1265
1266#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
1267 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1268 IEMMODE_16BIT, pCallEntry->idxInstr); \
1269 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1270
1271#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1272 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1273 IEMMODE_16BIT, pCallEntry->idxInstr); \
1274 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1275 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1276
1277#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
1278 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1279 IEMMODE_32BIT, pCallEntry->idxInstr); \
1280 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1281
1282#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1283 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1284 IEMMODE_32BIT, pCallEntry->idxInstr); \
1285 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1286 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1287
1288/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
1289 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
1290 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
1291template<bool const a_fFlat>
1292DECL_INLINE_THROW(uint32_t)
1293iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1294 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1295{
1296 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
1297#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1298 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1299#endif
1300
1301 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1302 if (!a_fFlat || enmEffOpSize == IEMMODE_16BIT)
1303 {
1304 off = iemNativeRegFlushPendingWrites(pReNative, off);
1305#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1306 Assert(pReNative->Core.offPc == 0);
1307#endif
1308 }
1309
1310 /* Allocate a temporary PC register. */
1311 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1312
1313 /* Perform the addition. */
1314#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1315 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1316#else
1317 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1318#endif
1319
1320 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
1321 if (enmEffOpSize == IEMMODE_16BIT)
1322 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1323
1324 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
1325 if (!a_fFlat)
1326 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1327
1328 /* Commit it. */
1329#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1330 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, enmEffOpSize == IEMMODE_32BIT ? 32 : 16);
1331 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1332#endif
1333
1334 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
1335#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1336 pReNative->Core.offPc = 0;
1337#endif
1338
1339 /* Free but don't flush the PC register. */
1340 iemNativeRegFreeTmp(pReNative, idxPcReg);
1341
1342 return off;
1343}
1344
1345
1346#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
1347 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1348 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1349
1350#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
1351 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1352 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1353 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1354
1355#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
1356 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1357 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1358
1359#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1360 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1361 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1362 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1363
1364#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
1365 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1366 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1367
1368#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1369 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1370 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1371 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1372
1373/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
1374DECL_INLINE_THROW(uint32_t)
1375iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1376 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
1377{
1378 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1379 off = iemNativeRegFlushPendingWrites(pReNative, off);
1380
1381#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1382 Assert(pReNative->Core.offPc == 0);
1383 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1384#endif
1385
1386 /* Allocate a temporary PC register. */
1387 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1388
1389 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
1390 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
1391 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1392 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1393#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1394 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, 16);
1395 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1396#endif
1397 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
1398
1399 /* Free but don't flush the PC register. */
1400 iemNativeRegFreeTmp(pReNative, idxPcReg);
1401
1402 return off;
1403}
1404
1405
1406
1407/*********************************************************************************************************************************
1408* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
1409*********************************************************************************************************************************/
1410
1411/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
1412#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
1413 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1414
1415/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
1416#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
1417 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1418
1419/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
1420#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
1421 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1422
1423/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
1424 * clears flags. */
1425#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
1426 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
1427 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1428
1429/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
1430 * clears flags. */
1431#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
1432 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
1433 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1434
1435/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
1436 * clears flags. */
1437#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
1438 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
1439 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1440
1441#undef IEM_MC_SET_RIP_U16_AND_FINISH
1442
1443
1444/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
1445#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
1446 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1447
1448/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
1449#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
1450 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1451
1452/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
1453 * clears flags. */
1454#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
1455 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
1456 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1457
1458/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
1459 * and clears flags. */
1460#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
1461 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
1462 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1463
1464#undef IEM_MC_SET_RIP_U32_AND_FINISH
1465
1466
1467/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
1468#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
1469 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1470
1471/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
1472 * and clears flags. */
1473#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
1474 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
1475 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1476
1477#undef IEM_MC_SET_RIP_U64_AND_FINISH
1478
1479
1480/** Same as iemRegRipJumpU16AndFinishNoFlags,
1481 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
1482DECL_INLINE_THROW(uint32_t)
1483iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
1484 uint8_t idxInstr, uint8_t cbVar)
1485{
1486 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1487 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1488
1489 /* If we can't rule out a #GP(0) below, flush all dirty register except for
1490 PC which will be handled specially by the two workers below if they raise a GP. */
1491 bool const fMayRaiseGp0 = (f64Bit && cbVar > sizeof(uint32_t)) || (!f64Bit && !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1492 uint8_t const idxOldPcReg = fMayRaiseGp0
1493 ? iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_Pc)
1494 : UINT8_MAX;
1495 if (fMayRaiseGp0)
1496 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1497
1498 /* Get a register with the new PC loaded from idxVarPc.
1499 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1500 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1501
1502 /* Check that the target is within CS.LIM / is canonical (may #GP(0) + exit TB). */
1503 if (fMayRaiseGp0)
1504 {
1505 if (f64Bit)
1506 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1507 else
1508 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1509 }
1510
1511 /* Store the result. */
1512 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
1513
1514#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1515 pReNative->Core.offPc = 0;
1516 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1517# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1518 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1519 pReNative->Core.fDebugPcInitialized = true;
1520 Log4(("uPcUpdatingDebug=rip off=%#x\n", off));
1521# endif
1522#endif
1523
1524 if (idxOldPcReg != UINT8_MAX)
1525 iemNativeRegFreeTmp(pReNative, idxOldPcReg);
1526 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1527 /** @todo implictly free the variable? */
1528
1529 return off;
1530}
1531
1532
1533
1534/*********************************************************************************************************************************
1535* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1536*********************************************************************************************************************************/
1537
1538/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1539 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1540DECL_FORCE_INLINE_THROW(uint32_t)
1541iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1542{
1543 /* Use16BitSp: */
1544#ifdef RT_ARCH_AMD64
1545 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1546 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1547#else
1548 /* sub regeff, regrsp, #cbMem */
1549 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1550 /* and regeff, regeff, #0xffff */
1551 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1552 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1553 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1554 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1555#endif
1556 return off;
1557}
1558
1559
1560DECL_FORCE_INLINE(uint32_t)
1561iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1562{
1563 /* Use32BitSp: */
1564 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1565 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1566 return off;
1567}
1568
1569
1570template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat>
1571DECL_INLINE_THROW(uint32_t)
1572iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1573 uintptr_t pfnFunction, uint8_t idxInstr)
1574{
1575 AssertCompile(a_cBitsVar == 16 || a_cBitsVar == 32 || a_cBitsVar == 64);
1576 AssertCompile(a_cBitsFlat == 0 || a_cBitsFlat == 32 || a_cBitsFlat == 64);
1577
1578 /*
1579 * Assert sanity.
1580 */
1581#ifdef VBOX_STRICT
1582 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
1583 {
1584 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1585 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1586 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1587 Assert( pfnFunction
1588 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 32) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1589 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 32) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1590 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 64) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1591 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 64) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1592 : UINT64_C(0xc000b000a0009000) ));
1593 }
1594 else
1595 Assert( pfnFunction
1596 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1597 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1598 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1599 : UINT64_C(0xc000b000a0009000) ));
1600#endif
1601
1602#ifdef VBOX_STRICT
1603 /*
1604 * Check that the fExec flags we've got make sense.
1605 */
1606 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1607#endif
1608
1609 /*
1610 * To keep things simple we have to commit any pending writes first as we
1611 * may end up making calls.
1612 */
1613 /** @todo we could postpone this till we make the call and reload the
1614 * registers after returning from the call. Not sure if that's sensible or
1615 * not, though. */
1616 off = iemNativeRegFlushPendingWrites(pReNative, off);
1617
1618 /*
1619 * First we calculate the new RSP and the effective stack pointer value.
1620 * For 64-bit mode and flat 32-bit these two are the same.
1621 * (Code structure is very similar to that of PUSH)
1622 */
1623 RT_CONSTEXPR
1624 uint8_t const cbMem = a_cBitsVar / 8;
1625 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1626 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1627 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1628 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1629 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
1630 {
1631 Assert(idxRegEffSp == idxRegRsp);
1632 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
1633 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1634 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
1635 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1636 else
1637 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1638 }
1639 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1640 {
1641 Assert(idxRegEffSp != idxRegRsp);
1642 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1643 kIemNativeGstRegUse_ReadOnly);
1644#ifdef RT_ARCH_AMD64
1645 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1646#else
1647 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1648#endif
1649 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1650 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1651 offFixupJumpToUseOtherBitSp = off;
1652 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1653 {
1654 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1655 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1656 }
1657 else
1658 {
1659 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1660 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1661 }
1662 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1663 }
1664 /* SpUpdateEnd: */
1665 uint32_t const offLabelSpUpdateEnd = off;
1666
1667 /*
1668 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1669 * we're skipping lookup).
1670 */
1671 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1672 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1673 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1674 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1675 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1676 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1677 : UINT32_MAX;
1678 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1679
1680
1681 if (!TlbState.fSkip)
1682 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1683 else
1684 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1685
1686 /*
1687 * Use16BitSp:
1688 */
1689 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
1690 {
1691#ifdef RT_ARCH_AMD64
1692 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1693#else
1694 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1695#endif
1696 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1697 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1698 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1699 else
1700 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1701 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1702 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1703 }
1704
1705 /*
1706 * TlbMiss:
1707 *
1708 * Call helper to do the pushing.
1709 */
1710 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1711
1712#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1713 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1714#else
1715 RT_NOREF(idxInstr);
1716#endif
1717
1718 /* Save variables in volatile registers. */
1719 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1720 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1721 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1722 | (RT_BIT_32(idxRegPc));
1723 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1724
1725 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1726 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1727 {
1728 /* Swap them using ARG0 as temp register: */
1729 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1730 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1731 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1732 }
1733 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1734 {
1735 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1736 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1737
1738 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1739 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1740 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1741 }
1742 else
1743 {
1744 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1745 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1746
1747 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1748 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1749 }
1750
1751#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
1752 /* Do delayed EFLAGS calculations. */
1753 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
1754 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
1755#endif
1756
1757 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1758 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1759
1760 /* Done setting up parameters, make the call. */
1761 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
1762
1763 /* Restore variables and guest shadow registers to volatile registers. */
1764 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1765 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1766
1767#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1768 if (!TlbState.fSkip)
1769 {
1770 /* end of TlbMiss - Jump to the done label. */
1771 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1772 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1773
1774 /*
1775 * TlbLookup:
1776 */
1777 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_WRITE>(pReNative, off, &TlbState, iSegReg,
1778 idxLabelTlbLookup, idxLabelTlbMiss,
1779 idxRegMemResult);
1780
1781 /*
1782 * Emit code to do the actual storing / fetching.
1783 */
1784 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1785# ifdef IEM_WITH_TLB_STATISTICS
1786 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1787 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1788# endif
1789 AssertCompile(cbMem == 2 || cbMem == 4 || cbMem == 8);
1790 if RT_CONSTEXPR_IF(cbMem == 2)
1791 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1792 else if RT_CONSTEXPR_IF(cbMem == 4)
1793 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1794 else
1795 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1796
1797 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1798 TlbState.freeRegsAndReleaseVars(pReNative);
1799
1800 /*
1801 * TlbDone:
1802 *
1803 * Commit the new RSP value.
1804 */
1805 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1806 }
1807#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1808
1809#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1810 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Rsp>(pReNative, off, idxRegRsp);
1811#endif
1812 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1813 if (idxRegEffSp != idxRegRsp)
1814 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1815
1816 return off;
1817}
1818
1819
1820/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1821#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1822 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1823
1824/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1825 * clears flags. */
1826#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1827 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1828 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1829
1830/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1831#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1832 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1833
1834/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1835 * clears flags. */
1836#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1837 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1838 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1839
1840#undef IEM_MC_IND_CALL_U16_AND_FINISH
1841
1842
1843/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1844#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1845 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1846
1847/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1848 * clears flags. */
1849#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1850 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1851 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1852
1853#undef IEM_MC_IND_CALL_U32_AND_FINISH
1854
1855
1856/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1857 * an extra parameter, for use in 64-bit code. */
1858#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1859 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1860
1861
1862/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1863 * an extra parameter, for use in 64-bit code and we need to check and clear
1864 * flags. */
1865#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1866 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1867 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1868
1869#undef IEM_MC_IND_CALL_U64_AND_FINISH
1870
1871/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1872 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1873DECL_INLINE_THROW(uint32_t)
1874iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1875 uint8_t idxInstr, uint8_t cbVar)
1876{
1877 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1878 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1879
1880 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1881 off = iemNativeRegFlushPendingWrites(pReNative, off);
1882
1883#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1884 Assert(pReNative->Core.offPc == 0);
1885 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1886#endif
1887
1888 /* Get a register with the new PC loaded from idxVarPc.
1889 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1890 uint8_t const idxPcRegNew = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1891
1892 /* Check limit (may #GP(0) + exit TB). */
1893 if (!f64Bit)
1894/** @todo we can skip this test in FLAT 32-bit mode. */
1895 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1896 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1897 else if (cbVar > sizeof(uint32_t))
1898 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1899
1900#if 1
1901 /* Allocate a temporary PC register, we don't want it shadowed. */
1902 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1903 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1904#else
1905 /* Allocate a temporary PC register. */
1906 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1907 true /*fNoVolatileRegs*/);
1908#endif
1909
1910 /* Perform the addition and push the variable to the guest stack. */
1911 /** @todo Flat variants for PC32 variants. */
1912 switch (cbVar)
1913 {
1914 case sizeof(uint16_t):
1915 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1916 /* Truncate the result to 16-bit IP. */
1917 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1918 off = iemNativeEmitStackPushRip<16, 0>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1919 break;
1920 case sizeof(uint32_t):
1921 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1922 /** @todo In FLAT mode we can use the flat variant. */
1923 off = iemNativeEmitStackPushRip<32, 0>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1924 break;
1925 case sizeof(uint64_t):
1926 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1927 off = iemNativeEmitStackPushRip<64, 64>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1928 break;
1929 default:
1930 AssertFailed();
1931 }
1932
1933 /* RSP got changed, so do this again. */
1934 off = iemNativeRegFlushPendingWrites(pReNative, off);
1935
1936 /* Store the result. */
1937 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcRegNew);
1938#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1939 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1940 pReNative->Core.fDebugPcInitialized = true;
1941 Log4(("uPcUpdatingDebug=rip/indirect-call off=%#x\n", off));
1942#endif
1943
1944#if 1
1945 /* Need to transfer the shadow information to the new RIP register. */
1946 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1947#else
1948 /* Sync the new PC. */
1949 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxPcRegNew);
1950#endif
1951 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1952 iemNativeRegFreeTmp(pReNative, idxPcReg);
1953 /** @todo implictly free the variable? */
1954
1955 return off;
1956}
1957
1958
1959/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1960 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1961#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1962 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1963
1964/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1965 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1966 * flags. */
1967#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1968 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1969 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1970
1971/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1972 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1973#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1974 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1975
1976/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1977 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1978 * flags. */
1979#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1980 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1981 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1982
1983/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1984 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1985#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1986 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1987
1988/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1989 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1990 * flags. */
1991#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1992 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1993 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1994
1995#undef IEM_MC_REL_CALL_S16_AND_FINISH
1996
1997/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1998 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1999DECL_INLINE_THROW(uint32_t)
2000iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
2001 uint8_t idxInstr)
2002{
2003 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2004 off = iemNativeRegFlushPendingWrites(pReNative, off);
2005
2006#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2007 Assert(pReNative->Core.offPc == 0);
2008 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2009#endif
2010
2011 /* Allocate a temporary PC register. */
2012 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2013 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
2014 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2015
2016 /* Calculate the new RIP. */
2017 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2018 /* Truncate the result to 16-bit IP. */
2019 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
2020 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2021 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2022
2023 /* Truncate the result to 16-bit IP. */
2024 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
2025
2026 /* Check limit (may #GP(0) + exit TB). */
2027 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2028
2029 /* Perform the addition and push the variable to the guest stack. */
2030 off = iemNativeEmitStackPushRip<16, 0>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
2031
2032 /* RSP got changed, so flush again. */
2033 off = iemNativeRegFlushPendingWrites(pReNative, off);
2034
2035 /* Store the result. */
2036 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcRegNew);
2037#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2038 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2039 pReNative->Core.fDebugPcInitialized = true;
2040 Log4(("uPcUpdatingDebug=rip/rel-call-16 off=%#x offDisp=%d\n", off, offDisp));
2041#endif
2042
2043 /* Need to transfer the shadow information to the new RIP register. */
2044 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2045 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2046 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2047
2048 return off;
2049}
2050
2051
2052/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2053 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2054#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
2055 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
2056
2057/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2058 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2059 * flags. */
2060#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
2061 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
2062 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2063
2064#undef IEM_MC_REL_CALL_S32_AND_FINISH
2065
2066/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2067 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2068DECL_INLINE_THROW(uint32_t)
2069iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
2070 uint8_t idxInstr)
2071{
2072 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2073 off = iemNativeRegFlushPendingWrites(pReNative, off);
2074
2075#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2076 Assert(pReNative->Core.offPc == 0);
2077 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2078#endif
2079
2080 /* Allocate a temporary PC register. */
2081 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2082 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2083 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2084
2085 /* Update the EIP to get the return address. */
2086 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2087
2088 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2089 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2090 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2091 /** @todo we can skip this test in FLAT 32-bit mode. */
2092 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2093
2094 /* Perform Perform the return address to the guest stack. */
2095 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
2096 off = iemNativeEmitStackPushRip<32, 0>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
2097
2098 /* RSP got changed, so do this again. */
2099 off = iemNativeRegFlushPendingWrites(pReNative, off);
2100
2101 /* Store the result. */
2102 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcRegNew);
2103#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2104 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2105 pReNative->Core.fDebugPcInitialized = true;
2106 Log4(("uPcUpdatingDebug=eip/rel-call-32 off=%#x offDisp=%d\n", off, offDisp));
2107#endif
2108
2109 /* Need to transfer the shadow information to the new RIP register. */
2110 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2111 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2112 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2113
2114 return off;
2115}
2116
2117
2118/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2119 * an extra parameter, for use in 64-bit code. */
2120#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
2121 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
2122
2123/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2124 * an extra parameter, for use in 64-bit code and we need to check and clear
2125 * flags. */
2126#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
2127 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
2128 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2129
2130#undef IEM_MC_REL_CALL_S64_AND_FINISH
2131
2132/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2133 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2134DECL_INLINE_THROW(uint32_t)
2135iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
2136 uint8_t idxInstr)
2137{
2138 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2139 off = iemNativeRegFlushPendingWrites(pReNative, off);
2140
2141#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2142 Assert(pReNative->Core.offPc == 0);
2143 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2144#endif
2145
2146 /* Allocate a temporary PC register. */
2147 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2148 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2149 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2150
2151 /* Update the RIP to get the return address. */
2152 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
2153
2154 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2155 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
2156 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
2157 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2158
2159 /* Perform Perform the return address to the guest stack. */
2160 off = iemNativeEmitStackPushRip<64, 64>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
2161
2162 /* RSP got changed, so do this again. */
2163 off = iemNativeRegFlushPendingWrites(pReNative, off);
2164
2165 /* Store the result. */
2166 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcRegNew);
2167#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2168 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2169 pReNative->Core.fDebugPcInitialized = true;
2170 Log4(("uPcUpdatingDebug=rip/rel-call-64 off=%#x offDisp=%ld\n", off, offDisp));
2171#endif
2172
2173 /* Need to transfer the shadow information to the new RIP register. */
2174 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2175 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2176 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2177
2178 return off;
2179}
2180
2181
2182/*********************************************************************************************************************************
2183* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
2184*********************************************************************************************************************************/
2185
2186DECL_FORCE_INLINE_THROW(uint32_t)
2187iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2188 uint16_t cbPopAdd, uint8_t idxRegTmp)
2189{
2190 /* Use16BitSp: */
2191#ifdef RT_ARCH_AMD64
2192 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2193 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
2194 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
2195 RT_NOREF(idxRegTmp);
2196
2197#elif defined(RT_ARCH_ARM64)
2198 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
2199 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
2200 /* add tmp, regrsp, #cbMem */
2201 uint16_t const cbCombined = cbMem + cbPopAdd;
2202 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
2203 if (cbCombined >= RT_BIT_32(12))
2204 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
2205 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2206 /* and tmp, tmp, #0xffff */
2207 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2208 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
2209 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
2210 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
2211
2212#else
2213# error "Port me"
2214#endif
2215 return off;
2216}
2217
2218
2219DECL_FORCE_INLINE_THROW(uint32_t)
2220iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2221 uint16_t cbPopAdd)
2222{
2223 /* Use32BitSp: */
2224 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2225 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
2226 return off;
2227}
2228
2229
2230/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
2231#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_cbPopArgs, a_cbInstr) \
2232 off = iemNativeEmitRetn<IEMMODE_16BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2233
2234/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
2235#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2236 Assert((a_enmEffOpSize) == IEMMODE_32BIT || (a_enmEffOpSize) == IEMMODE_16BIT); \
2237 off = (a_enmEffOpSize) == IEMMODE_32BIT \
2238 ? iemNativeEmitRetn<IEMMODE_32BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr) \
2239 : iemNativeEmitRetn<IEMMODE_16BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2240
2241/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
2242#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2243 Assert((a_enmEffOpSize) == IEMMODE_64BIT || (a_enmEffOpSize) == IEMMODE_16BIT); \
2244 off = (a_enmEffOpSize) == IEMMODE_64BIT \
2245 ? iemNativeEmitRetn<IEMMODE_64BIT, true>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr) \
2246 : iemNativeEmitRetn<IEMMODE_16BIT, true>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2247
2248/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
2249 * clears flags. */
2250#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbPopArgs, a_cbInstr) \
2251 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_cbPopArgs, a_cbInstr); \
2252 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2253
2254/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
2255 * clears flags. */
2256#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2257 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_cbPopArgs, a_cbInstr, a_enmEffOpSize); \
2258 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2259
2260/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
2261 * clears flags. */
2262#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2263 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_cbPopArgs, a_cbInstr, a_enmEffOpSize); \
2264 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2265
2266/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
2267template<IEMMODE const a_enmEffOpSize, bool const a_f64Bit>
2268DECL_INLINE_THROW(uint32_t)
2269iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPopArgs, uint8_t idxInstr)
2270{
2271 RT_NOREF(cbInstr);
2272 AssertCompile(a_enmEffOpSize == IEMMODE_64BIT || a_enmEffOpSize == IEMMODE_32BIT || a_enmEffOpSize == IEMMODE_16BIT);
2273
2274#ifdef VBOX_STRICT
2275 /*
2276 * Check that the fExec flags we've got make sense.
2277 */
2278 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
2279#endif
2280
2281 /*
2282 * To keep things simple we have to commit any pending writes first as we
2283 * may end up making calls.
2284 */
2285 off = iemNativeRegFlushPendingWrites(pReNative, off);
2286
2287 /*
2288 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
2289 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
2290 * directly as the effective stack pointer.
2291 *
2292 * (Code structure is very similar to that of PUSH)
2293 *
2294 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
2295 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
2296 * aren't commonly used (or useful) and thus not in need of optimizing.
2297 *
2298 * Note! For non-flat modes the guest RSP is not allocated for update but
2299 * rather for calculation as the shadowed register would remain modified
2300 * even if the return address throws a #GP(0) due to being outside the
2301 * CS limit causing a wrong stack pointer value in the guest (see the
2302 * near return testcase in bs3-cpu-basic-2). If no exception is thrown
2303 * the shadowing is transfered to the new register returned by
2304 * iemNativeRegAllocTmpForGuestReg() at the end.
2305 */
2306 RT_CONSTEXPR
2307 uint8_t const cbMem = a_enmEffOpSize == IEMMODE_64BIT
2308 ? sizeof(uint64_t)
2309 : a_enmEffOpSize == IEMMODE_32BIT
2310 ? sizeof(uint32_t)
2311 : sizeof(uint16_t);
2312/** @todo the basic flatness could be detected by the threaded compiler step
2313 * like for the other macros... worth it? */
2314 bool const fFlat = a_enmEffOpSize == IEMMODE_64BIT
2315 || (a_enmEffOpSize == IEMMODE_32BIT /* see note */ && IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
2316 uintptr_t const pfnFunction = a_enmEffOpSize == IEMMODE_64BIT
2317 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
2318 : fFlat
2319 ? (uintptr_t)iemNativeHlpStackFlatFetchU32
2320 : a_enmEffOpSize == IEMMODE_32BIT
2321 ? (uintptr_t)iemNativeHlpStackFetchU32
2322 : (uintptr_t)iemNativeHlpStackFetchU16;
2323 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
2324 fFlat ? kIemNativeGstRegUse_ForUpdate
2325 : kIemNativeGstRegUse_Calculation,
2326 true /*fNoVolatileRegs*/);
2327 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
2328 /** @todo can do a better job picking the register here. For cbMem >= 4 this
2329 * will be the resulting register value. */
2330 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
2331
2332 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
2333 if (fFlat)
2334 Assert(idxRegEffSp == idxRegRsp);
2335 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
2336 {
2337 Assert(idxRegEffSp != idxRegRsp);
2338 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
2339 kIemNativeGstRegUse_ReadOnly);
2340#ifdef RT_ARCH_AMD64
2341 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2342#else
2343 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2344#endif
2345 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
2346 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
2347 offFixupJumpToUseOtherBitSp = off;
2348 if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_32BIT)
2349 {
2350 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
2351 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs);
2352 }
2353 else
2354 {
2355 Assert(a_enmEffOpSize == IEMMODE_16BIT);
2356 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
2357 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs,
2358 idxRegMemResult);
2359 }
2360 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2361 }
2362 /* SpUpdateEnd: */
2363 uint32_t const offLabelSpUpdateEnd = off;
2364
2365 /*
2366 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
2367 * we're skipping lookup).
2368 */
2369 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
2370 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
2371 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
2372 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
2373 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
2374 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
2375 : UINT32_MAX;
2376
2377 if (!TlbState.fSkip)
2378 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
2379 else
2380 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
2381
2382 /*
2383 * Use16BitSp:
2384 */
2385 if (!fFlat)
2386 {
2387#ifdef RT_ARCH_AMD64
2388 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2389#else
2390 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2391#endif
2392 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
2393 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
2394 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs,
2395 idxRegMemResult);
2396 else
2397 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs);
2398 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
2399 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2400 }
2401
2402 /*
2403 * TlbMiss:
2404 *
2405 * Call helper to do the pushing.
2406 */
2407 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
2408
2409#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2410 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2411#else
2412 RT_NOREF(idxInstr);
2413#endif
2414
2415 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
2416 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
2417 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
2418 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
2419
2420
2421 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
2422 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
2423 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
2424
2425#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2426 /* Do delayed EFLAGS calculations. */
2427 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
2428#endif
2429
2430 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
2431 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2432
2433 /* Done setting up parameters, make the call. */
2434 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
2435
2436 /* Move the return register content to idxRegMemResult. */
2437 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
2438 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
2439
2440 /* Restore variables and guest shadow registers to volatile registers. */
2441 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
2442 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
2443
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP
2445 if (!TlbState.fSkip)
2446 {
2447 /* end of TlbMiss - Jump to the done label. */
2448 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
2449 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
2450
2451 /*
2452 * TlbLookup:
2453 */
2454 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ>(pReNative, off, &TlbState, iSegReg,
2455 idxLabelTlbLookup, idxLabelTlbMiss,
2456 idxRegMemResult);
2457
2458 /*
2459 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
2460 */
2461 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2462# ifdef IEM_WITH_TLB_STATISTICS
2463 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
2464 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
2465# endif
2466 switch (cbMem)
2467 {
2468 case 2:
2469 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2470 break;
2471 case 4:
2472 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2473 break;
2474 case 8:
2475 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2476 break;
2477 default:
2478 AssertFailed();
2479 }
2480
2481 TlbState.freeRegsAndReleaseVars(pReNative);
2482
2483 /*
2484 * TlbDone:
2485 *
2486 * Set the new RSP value (FLAT accesses needs to calculate it first) and
2487 * commit the popped register value.
2488 */
2489 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
2490 }
2491#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
2492
2493 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
2494 if RT_CONSTEXPR_IF(!a_f64Bit)
2495/** @todo we can skip this test in FLAT 32-bit mode. */
2496 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2497 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2498 else if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_64BIT)
2499 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2500
2501 /* Complete RSP calculation for FLAT mode. */
2502 if (idxRegEffSp == idxRegRsp)
2503 {
2504 if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_64BIT)
2505 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPopArgs);
2506 else
2507 {
2508 Assert(a_enmEffOpSize == IEMMODE_32BIT);
2509 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPopArgs);
2510 }
2511 }
2512
2513 /* Commit the result and clear any current guest shadows for RIP. */
2514 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Rsp>(pReNative, off, idxRegRsp);
2515 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>( pReNative, off, idxRegMemResult);
2516 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2517#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2518 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2519 pReNative->Core.fDebugPcInitialized = true;
2520 Log4(("uPcUpdatingDebug=rip/ret off=%#x\n", off));
2521#endif
2522
2523 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2524 if (!fFlat)
2525 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2526
2527 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2528 if (idxRegEffSp != idxRegRsp)
2529 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2530 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2531 return off;
2532}
2533
2534
2535/*********************************************************************************************************************************
2536* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2537*********************************************************************************************************************************/
2538
2539#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2540 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2541
2542/**
2543 * Emits code to check if a \#NM exception should be raised.
2544 *
2545 * @returns New code buffer offset, UINT32_MAX on failure.
2546 * @param pReNative The native recompile state.
2547 * @param off The code buffer offset.
2548 * @param idxInstr The current instruction.
2549 */
2550DECL_INLINE_THROW(uint32_t)
2551iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2552{
2553 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2554
2555 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2556 {
2557 /*
2558 * Make sure we don't have any outstanding guest register writes as we may
2559 * raise an #NM and all guest register must be up to date in CPUMCTX.
2560 */
2561 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2562 off = iemNativeRegFlushPendingWrites(pReNative, off);
2563
2564#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2565 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2566#else
2567 RT_NOREF(idxInstr);
2568#endif
2569
2570 /* Allocate a temporary CR0 register. */
2571 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2572 kIemNativeGstRegUse_ReadOnly);
2573
2574 /*
2575 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2576 * return raisexcpt();
2577 */
2578 /* Test and jump. */
2579 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg,
2580 X86_CR0_EM | X86_CR0_TS);
2581
2582 /* Free but don't flush the CR0 register. */
2583 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2584
2585 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2586 }
2587 else
2588 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2589
2590 return off;
2591}
2592
2593
2594#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2595 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2596
2597/**
2598 * Emits code to check if a \#NM exception should be raised.
2599 *
2600 * @returns New code buffer offset, UINT32_MAX on failure.
2601 * @param pReNative The native recompile state.
2602 * @param off The code buffer offset.
2603 * @param idxInstr The current instruction.
2604 */
2605DECL_INLINE_THROW(uint32_t)
2606iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2607{
2608 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2609
2610 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2611 {
2612 /*
2613 * Make sure we don't have any outstanding guest register writes as we may
2614 * raise an #NM and all guest register must be up to date in CPUMCTX.
2615 */
2616 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2617 off = iemNativeRegFlushPendingWrites(pReNative, off);
2618
2619#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2620 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2621#else
2622 RT_NOREF(idxInstr);
2623#endif
2624
2625 /* Allocate a temporary CR0 register. */
2626 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2627 kIemNativeGstRegUse_Calculation);
2628
2629 /*
2630 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2631 * return raisexcpt();
2632 */
2633 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2634 /* Test and jump. */
2635 off = iemNativeEmitTbExitIfGpr32EqualsImm<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2636
2637 /* Free the CR0 register. */
2638 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2639
2640 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2641 }
2642 else
2643 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2644
2645 return off;
2646}
2647
2648
2649#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2650 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2651
2652/**
2653 * Emits code to check if a \#MF exception should be raised.
2654 *
2655 * @returns New code buffer offset, UINT32_MAX on failure.
2656 * @param pReNative The native recompile state.
2657 * @param off The code buffer offset.
2658 * @param idxInstr The current instruction.
2659 */
2660DECL_INLINE_THROW(uint32_t)
2661iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2662{
2663 /*
2664 * Make sure we don't have any outstanding guest register writes as we may
2665 * raise an #MF and all guest register must be up to date in CPUMCTX.
2666 */
2667 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2668 off = iemNativeRegFlushPendingWrites(pReNative, off);
2669
2670#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2671 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2672#else
2673 RT_NOREF(idxInstr);
2674#endif
2675
2676 /* Allocate a temporary FSW register. */
2677 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2678 kIemNativeGstRegUse_ReadOnly);
2679
2680 /*
2681 * if (FSW & X86_FSW_ES != 0)
2682 * return raisexcpt();
2683 */
2684 /* Test and jump. */
2685 off = iemNativeEmitTbExitIfBitSetInGpr<kIemNativeLabelType_RaiseMf>(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT);
2686
2687 /* Free but don't flush the FSW register. */
2688 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2689
2690 return off;
2691}
2692
2693
2694#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2695 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2696
2697/**
2698 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2699 *
2700 * @returns New code buffer offset, UINT32_MAX on failure.
2701 * @param pReNative The native recompile state.
2702 * @param off The code buffer offset.
2703 * @param idxInstr The current instruction.
2704 */
2705DECL_INLINE_THROW(uint32_t)
2706iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2707{
2708 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2709
2710 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2711 {
2712 /*
2713 * Make sure we don't have any outstanding guest register writes as we may
2714 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2715 */
2716 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2717 off = iemNativeRegFlushPendingWrites(pReNative, off);
2718
2719#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2720 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2721#else
2722 RT_NOREF(idxInstr);
2723#endif
2724
2725 /* Allocate a temporary CR0 and CR4 register. */
2726 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2727 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2728 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2729
2730 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2731#ifdef RT_ARCH_AMD64
2732 /*
2733 * We do a modified test here:
2734 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2735 * else { goto RaiseSseRelated; }
2736 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2737 * all targets except the 386, which doesn't support SSE, this should
2738 * be a safe assumption.
2739 */
2740 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2741 1+6+3+3+7+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2742 //pCodeBuf[off++] = 0xcc;
2743 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2744 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2745 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2746 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2747 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2748 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2749
2750#elif defined(RT_ARCH_ARM64)
2751 /*
2752 * We do a modified test here:
2753 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2754 * else { goto RaiseSseRelated; }
2755 */
2756 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2757 1+5 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2758 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2759 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2760 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2761 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2762 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2763 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2764 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2765 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2766 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off,
2767 idxTmpReg, false /*f64Bit*/);
2768
2769#else
2770# error "Port me!"
2771#endif
2772
2773 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2774 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2775 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2776 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2777
2778 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2779 }
2780 else
2781 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2782
2783 return off;
2784}
2785
2786
2787#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2788 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2789
2790/**
2791 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2792 *
2793 * @returns New code buffer offset, UINT32_MAX on failure.
2794 * @param pReNative The native recompile state.
2795 * @param off The code buffer offset.
2796 * @param idxInstr The current instruction.
2797 */
2798DECL_INLINE_THROW(uint32_t)
2799iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2800{
2801 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2802
2803 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2804 {
2805 /*
2806 * Make sure we don't have any outstanding guest register writes as we may
2807 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2808 */
2809 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2810 off = iemNativeRegFlushPendingWrites(pReNative, off);
2811
2812#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2813 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2814#else
2815 RT_NOREF(idxInstr);
2816#endif
2817
2818 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2819 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2820 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2821 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2822 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2823
2824 /*
2825 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2826 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2827 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2828 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2829 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2830 * { likely }
2831 * else { goto RaiseAvxRelated; }
2832 */
2833#ifdef RT_ARCH_AMD64
2834 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2835 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2836 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2837 ^ 0x1a) ) { likely }
2838 else { goto RaiseAvxRelated; } */
2839 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2840 1+6+3+5+3+5+3+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2841 //pCodeBuf[off++] = 0xcc;
2842 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2843 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2844 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2845 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2846 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2847 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2848 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2849 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2850 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2851 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2852 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2853
2854#elif defined(RT_ARCH_ARM64)
2855 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2856 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2857 else { goto RaiseAvxRelated; } */
2858 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2859 1+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2860 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2861 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2862 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2863 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2864 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2865 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2866 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2867 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2868 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2869 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2870 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2871 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off,
2872 idxTmpReg, false /*f64Bit*/);
2873
2874#else
2875# error "Port me!"
2876#endif
2877
2878 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2879 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2880 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2881 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2882
2883 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2884 }
2885 else
2886 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2887
2888 return off;
2889}
2890
2891
2892#define IEM_MC_RAISE_DIVIDE_ERROR_IF_LOCAL_IS_ZERO(a_uVar) \
2893 off = iemNativeEmitRaiseDivideErrorIfLocalIsZero(pReNative, off, a_uVar, pCallEntry->idxInstr)
2894
2895/**
2896 * Emits code to raise a \#DE if a local variable is zero.
2897 *
2898 * @returns New code buffer offset, UINT32_MAX on failure.
2899 * @param pReNative The native recompile state.
2900 * @param off The code buffer offset.
2901 * @param idxVar The variable to check. This must be 32-bit (EFLAGS).
2902 * @param idxInstr The current instruction.
2903 */
2904DECL_INLINE_THROW(uint32_t)
2905iemNativeEmitRaiseDivideErrorIfLocalIsZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxInstr)
2906{
2907 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2908 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, sizeof(uint32_t));
2909
2910 /* Make sure we don't have any outstanding guest register writes as we may. */
2911 off = iemNativeRegFlushPendingWrites(pReNative, off);
2912
2913 /* Set the instruction number if we're counting. */
2914#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2915 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2916#else
2917 RT_NOREF(idxInstr);
2918#endif
2919
2920 /* Do the job we're here for. */
2921 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off);
2922 off = iemNativeEmitTbExitIfGprIsZero<kIemNativeLabelType_RaiseDe>(pReNative, off, idxVarReg, false /*f64Bit*/);
2923 iemNativeVarRegisterRelease(pReNative, idxVar);
2924
2925 return off;
2926}
2927
2928
2929#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2930 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2931
2932/**
2933 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2934 *
2935 * @returns New code buffer offset, UINT32_MAX on failure.
2936 * @param pReNative The native recompile state.
2937 * @param off The code buffer offset.
2938 * @param idxInstr The current instruction.
2939 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2940 * @param cbAlign The alignment in bytes to check against.
2941 */
2942DECL_INLINE_THROW(uint32_t)
2943iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2944 uint8_t idxVarEffAddr, uint8_t cbAlign)
2945{
2946 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2947 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2948
2949 /*
2950 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2951 */
2952 off = iemNativeRegFlushPendingWrites(pReNative, off);
2953
2954#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2955 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2956#else
2957 RT_NOREF(idxInstr);
2958#endif
2959
2960 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2961 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseGp0>(pReNative, off, idxVarReg, cbAlign - 1);
2962 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2963
2964 return off;
2965}
2966
2967
2968/*********************************************************************************************************************************
2969* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2970*********************************************************************************************************************************/
2971
2972/**
2973 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2974 *
2975 * @returns Pointer to the condition stack entry on success, NULL on failure
2976 * (too many nestings)
2977 */
2978DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
2979{
2980 uint32_t const idxStack = pReNative->cCondDepth;
2981 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2982
2983 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2984 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2985
2986 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2987 pEntry->fInElse = false;
2988 pEntry->fIfExitTb = false;
2989 pEntry->fElseExitTb = false;
2990 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2991 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2992
2993 return pEntry;
2994}
2995
2996
2997/**
2998 * Start of the if-block, snapshotting the register and variable state.
2999 */
3000DECL_INLINE_THROW(void)
3001iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
3002{
3003 Assert(offIfBlock != UINT32_MAX);
3004 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3005 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3006 Assert(!pEntry->fInElse);
3007
3008 /* Define the start of the IF block if request or for disassembly purposes. */
3009 if (idxLabelIf != UINT32_MAX)
3010 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
3011#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3012 else
3013 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
3014#else
3015 RT_NOREF(offIfBlock);
3016#endif
3017
3018 /* Copy the initial state so we can restore it in the 'else' block. */
3019 pEntry->InitialState = pReNative->Core;
3020}
3021
3022
3023#define IEM_MC_ELSE() } while (0); \
3024 off = iemNativeEmitElse(pReNative, off); \
3025 do {
3026
3027/** Emits code related to IEM_MC_ELSE. */
3028DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3029{
3030 /* Check sanity and get the conditional stack entry. */
3031 Assert(off != UINT32_MAX);
3032 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3033 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3034 Assert(!pEntry->fInElse);
3035
3036 /* We can skip dirty register flushing and the dirty register flushing if
3037 the branch already jumped to a TB exit. */
3038 if (!pEntry->fIfExitTb)
3039 {
3040#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) && 0
3041 /* Writeback any dirty shadow registers. */
3042 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
3043 * in one of the branches and leave guest registers already dirty before the start of the if
3044 * block alone. */
3045 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3046#endif
3047
3048 /* Jump to the endif. */
3049 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
3050 }
3051# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3052 else
3053 Assert(pReNative->Core.offPc == 0);
3054# endif
3055
3056 /* Define the else label and enter the else part of the condition. */
3057 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3058 pEntry->fInElse = true;
3059
3060 /* Snapshot the core state so we can do a merge at the endif and restore
3061 the snapshot we took at the start of the if-block. */
3062 pEntry->IfFinalState = pReNative->Core;
3063 pReNative->Core = pEntry->InitialState;
3064
3065 return off;
3066}
3067
3068
3069#define IEM_MC_ENDIF() } while (0); \
3070 off = iemNativeEmitEndIf(pReNative, off)
3071
3072/** Emits code related to IEM_MC_ENDIF. */
3073DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3074{
3075 /* Check sanity and get the conditional stack entry. */
3076 Assert(off != UINT32_MAX);
3077 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3078 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3079
3080#if defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) && 0
3081 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3082#endif
3083
3084 /*
3085 * If either of the branches exited the TB, we can take the state from the
3086 * other branch and skip all the merging headache.
3087 */
3088 bool fDefinedLabels = false;
3089 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
3090 {
3091#ifdef VBOX_STRICT
3092 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
3093 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
3094 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
3095 ? &pEntry->IfFinalState : &pReNative->Core;
3096# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3097 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
3098# endif
3099# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3100 Assert(pExitCoreState->offPc == 0);
3101# endif
3102 RT_NOREF(pExitCoreState);
3103#endif
3104
3105 if (!pEntry->fIfExitTb)
3106 {
3107 Assert(pEntry->fInElse);
3108 pReNative->Core = pEntry->IfFinalState;
3109 }
3110 }
3111 else
3112 {
3113 /*
3114 * Now we have find common group with the core state at the end of the
3115 * if-final. Use the smallest common denominator and just drop anything
3116 * that isn't the same in both states.
3117 */
3118 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
3119 * which is why we're doing this at the end of the else-block.
3120 * But we'd need more info about future for that to be worth the effort. */
3121 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
3122#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3123 AssertMsgStmt(pReNative->Core.offPc == pOther->offPc,
3124 ("Core.offPc=%#RX64 pOther->offPc=%#RX64\n", pReNative->Core.offPc, pOther->offPc),
3125 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3126#endif
3127
3128 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
3129 {
3130#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3131 /*
3132 * If the branch has differences in dirty shadow registers, we will flush
3133 * the register only dirty in the current branch and dirty any that's only
3134 * dirty in the other one.
3135 */
3136 uint64_t const fGstRegDirtyOther = pOther->bmGstRegShadowDirty;
3137 uint64_t const fGstRegDirtyThis = pReNative->Core.bmGstRegShadowDirty;
3138 uint64_t const fGstRegDirtyDiff = fGstRegDirtyOther ^ fGstRegDirtyThis;
3139 uint64_t const fGstRegDirtyHead = fGstRegDirtyThis & fGstRegDirtyDiff;
3140 uint64_t fGstRegDirtyTail = fGstRegDirtyOther & fGstRegDirtyDiff;
3141 if (!fGstRegDirtyDiff)
3142 { /* likely */ }
3143 else
3144 {
3145 //uint64_t const fGstRegDirtyHead = pReNative->Core.bmGstRegShadowDirty & fGstRegDirtyDiff;
3146 if (fGstRegDirtyHead)
3147 {
3148 Log12(("iemNativeEmitEndIf: flushing dirty guest registers in current branch: %RX64\n", fGstRegDirtyHead));
3149 off = iemNativeRegFlushDirtyGuest(pReNative, off, fGstRegDirtyHead);
3150 }
3151 }
3152#endif
3153
3154 /*
3155 * Shadowed guest registers.
3156 *
3157 * We drop any shadows where the two states disagree about where
3158 * things are kept. We may end up flushing dirty more registers
3159 * here, if the two branches keeps things in different registers.
3160 */
3161 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
3162 if (fGstRegs)
3163 {
3164 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
3165 do
3166 {
3167 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3168 fGstRegs &= ~RT_BIT_64(idxGstReg);
3169
3170 uint8_t const idxCurHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3171 uint8_t const idxOtherHstReg = pOther->aidxGstRegShadows[idxGstReg];
3172 if ( idxCurHstReg != idxOtherHstReg
3173 || !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg)))
3174 {
3175#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3176 Log12(("iemNativeEmitEndIf: dropping gst %s (%d) from hst %s (other %d/%#RX64)\n",
3177 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3178 idxOtherHstReg, pOther->bmGstRegShadows));
3179#else
3180 Log12(("iemNativeEmitEndIf: dropping %s gst %s (%d) from hst %s (other %d/%#RX64/%s)\n",
3181 pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "_dirty_" : "clean",
3182 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3183 idxOtherHstReg, pOther->bmGstRegShadows,
3184 pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "dirty" : "clean"));
3185 if (pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3186 fGstRegDirtyTail |= RT_BIT_64(idxGstReg);
3187 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3188 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3189#endif
3190 iemNativeRegClearGstRegShadowingOne(pReNative, idxCurHstReg, (IEMNATIVEGSTREG)idxGstReg, off);
3191 }
3192 } while (fGstRegs);
3193 }
3194 else
3195 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
3196
3197#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3198 /*
3199 * Generate jumpy code for flushing dirty registers from the other
3200 * branch that aren't dirty in the current one.
3201 */
3202 if (!fGstRegDirtyTail)
3203 { /* likely */ }
3204 else
3205 {
3206 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEndIfOtherBranchDirty);
3207 Log12(("iemNativeEmitEndIf: Dirty register only in the other branch: %#RX64 - BAD!\n", fGstRegDirtyTail));
3208
3209 /* First the current branch has to jump over the dirty flushing from the other branch. */
3210 uint32_t const offFixup1 = off;
3211 off = iemNativeEmitJmpToFixed(pReNative, off, off + 10);
3212
3213 /* Put the endif and maybe else label here so the other branch ends up here. */
3214 if (!pEntry->fInElse)
3215 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3216 else
3217 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3218 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3219 fDefinedLabels = true;
3220
3221 /* Flush the dirty guest registers from the other branch. */
3222 while (fGstRegDirtyTail)
3223 {
3224 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegDirtyTail) - 1;
3225 fGstRegDirtyTail &= ~RT_BIT_64(idxGstReg);
3226 Log12(("iemNativeEmitEndIf: tail flushing %s (%d) from other branch %d (cur %d/%#RX64)\n",
3227 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, pOther->aidxGstRegShadows[idxGstReg],
3228 pReNative->Core.aidxGstRegShadows[idxGstReg], pReNative->Core.bmGstRegShadows));
3229
3230 off = iemNativeRegFlushPendingWriteEx(pReNative, off, (PIEMNATIVECORESTATE)pOther, (IEMNATIVEGSTREG)idxGstReg);
3231
3232 /* Mismatching shadowing should've been dropped in the previous step already. */
3233 Assert( !(pReNative->Core.bmGstRegShadows & RT_BIT_64(idxGstReg))
3234 || pReNative->Core.aidxGstRegShadows[idxGstReg] == pOther->aidxGstRegShadows[idxGstReg]);
3235 }
3236
3237 /* Here is the actual endif label, fixup the above jump to land here. */
3238 iemNativeFixupFixedJump(pReNative, offFixup1, off);
3239 }
3240#endif
3241
3242 /*
3243 * Check variables next. For now we must require them to be identical
3244 * or stuff we can recreate. (No code is emitted here.)
3245 */
3246 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
3247#ifdef VBOX_STRICT
3248 uint32_t const offAssert = off;
3249#endif
3250 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
3251 if (fVars)
3252 {
3253 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
3254 do
3255 {
3256 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
3257 fVars &= ~RT_BIT_32(idxVar);
3258
3259 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
3260 {
3261 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
3262 continue;
3263 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3264 {
3265 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3266 if (idxHstReg != UINT8_MAX)
3267 {
3268 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3269 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3270 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
3271 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3272 }
3273 continue;
3274 }
3275 }
3276 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
3277 continue;
3278
3279 /* Irreconcilable, so drop it. */
3280 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3281 if (idxHstReg != UINT8_MAX)
3282 {
3283 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3284 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3285 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
3286 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3287 }
3288 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3289 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
3290 } while (fVars);
3291 }
3292 Assert(off == offAssert);
3293
3294 /*
3295 * Finally, check that the host register allocations matches.
3296 */
3297 AssertMsgStmt((pReNative->Core.bmHstRegs & (pReNative->Core.bmHstRegs ^ pOther->bmHstRegs)) == 0,
3298 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
3299 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
3300 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3301 }
3302 }
3303
3304 /*
3305 * Define the endif label and maybe the else one if we're still in the 'if' part.
3306 */
3307 if (!fDefinedLabels)
3308 {
3309 if (!pEntry->fInElse)
3310 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3311 else
3312 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3313 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3314 }
3315
3316 /* Pop the conditional stack.*/
3317 pReNative->cCondDepth -= 1;
3318
3319 return off;
3320}
3321
3322
3323/**
3324 * Helper function to convert X86_EFL_xxx masks to liveness masks.
3325 *
3326 * The compiler should be able to figure this out at compile time, so sprinkling
3327 * constexpr where ever possible here to nudge it along.
3328 */
3329template<uint32_t const a_fEfl>
3330RT_CONSTEXPR uint64_t iemNativeEflagsToLivenessMask(void)
3331{
3332 return (a_fEfl & ~X86_EFL_STATUS_BITS ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER) : 0)
3333 | (a_fEfl & X86_EFL_CF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_CF) : 0)
3334 | (a_fEfl & X86_EFL_PF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_PF) : 0)
3335 | (a_fEfl & X86_EFL_AF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_AF) : 0)
3336 | (a_fEfl & X86_EFL_ZF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_ZF) : 0)
3337 | (a_fEfl & X86_EFL_SF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_SF) : 0)
3338 | (a_fEfl & X86_EFL_OF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OF) : 0);
3339}
3340
3341
3342/**
3343 * Helper function to convert a single X86_EFL_xxxx value to bit number.
3344 *
3345 * The compiler should be able to figure this out at compile time, so sprinkling
3346 * constexpr where ever possible here to nudge it along.
3347 */
3348template<uint32_t const a_fEfl>
3349RT_CONSTEXPR unsigned iemNativeEflagsToSingleBitNo(void)
3350{
3351 AssertCompile( a_fEfl == X86_EFL_CF
3352 || a_fEfl == X86_EFL_PF
3353 || a_fEfl == X86_EFL_AF
3354 || a_fEfl == X86_EFL_ZF
3355 || a_fEfl == X86_EFL_SF
3356 || a_fEfl == X86_EFL_OF
3357 || a_fEfl == X86_EFL_DF);
3358 return a_fEfl == X86_EFL_CF ? X86_EFL_CF_BIT
3359 : a_fEfl == X86_EFL_PF ? X86_EFL_PF_BIT
3360 : a_fEfl == X86_EFL_AF ? X86_EFL_AF_BIT
3361 : a_fEfl == X86_EFL_ZF ? X86_EFL_ZF_BIT
3362 : a_fEfl == X86_EFL_SF ? X86_EFL_SF_BIT
3363 : a_fEfl == X86_EFL_OF ? X86_EFL_OF_BIT
3364 : X86_EFL_DF_BIT;
3365}
3366
3367
3368#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
3369 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3370 do {
3371
3372/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
3373DECL_INLINE_THROW(uint32_t)
3374iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3375{
3376 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3377 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3378 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3379
3380 /* Get the eflags. */
3381 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3382
3383 /* Test and jump. */
3384 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3385
3386 /* Free but don't flush the EFlags register. */
3387 iemNativeRegFreeTmp(pReNative, idxEflReg);
3388
3389 /* Make a copy of the core state now as we start the if-block. */
3390 iemNativeCondStartIfBlock(pReNative, off);
3391
3392 return off;
3393}
3394
3395
3396#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
3397 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3398 do {
3399
3400/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
3401DECL_INLINE_THROW(uint32_t)
3402iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3403{
3404 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3405 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3406 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3407
3408 /* Get the eflags. */
3409 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3410
3411 /* Test and jump. */
3412 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3413
3414 /* Free but don't flush the EFlags register. */
3415 iemNativeRegFreeTmp(pReNative, idxEflReg);
3416
3417 /* Make a copy of the core state now as we start the if-block. */
3418 iemNativeCondStartIfBlock(pReNative, off);
3419
3420 return off;
3421}
3422
3423
3424#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
3425 off = iemNativeEmitIfEflagsBitSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3426 iemNativeEflagsToLivenessMask<a_fBit>()); \
3427 do {
3428
3429/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
3430DECL_INLINE_THROW(uint32_t)
3431iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3432{
3433 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3434 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3435 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3436
3437 /* Get the eflags. */
3438 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3439
3440 /* Test and jump. */
3441 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3442
3443 /* Free but don't flush the EFlags register. */
3444 iemNativeRegFreeTmp(pReNative, idxEflReg);
3445
3446 /* Make a copy of the core state now as we start the if-block. */
3447 iemNativeCondStartIfBlock(pReNative, off);
3448
3449 return off;
3450}
3451
3452
3453#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
3454 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3455 iemNativeEflagsToLivenessMask<a_fBit>()); \
3456 do {
3457
3458/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
3459DECL_INLINE_THROW(uint32_t)
3460iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3461{
3462 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3463 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3464 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3465
3466 /* Get the eflags. */
3467 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3468
3469 /* Test and jump. */
3470 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3471
3472 /* Free but don't flush the EFlags register. */
3473 iemNativeRegFreeTmp(pReNative, idxEflReg);
3474
3475 /* Make a copy of the core state now as we start the if-block. */
3476 iemNativeCondStartIfBlock(pReNative, off);
3477
3478 return off;
3479}
3480
3481
3482#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
3483 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3484 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3485 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3486 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3487 do {
3488
3489#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
3490 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3491 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3492 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3493 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3494 do {
3495
3496/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
3497DECL_INLINE_THROW(uint32_t)
3498iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3499 bool fInverted, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3500{
3501 Assert(iBitNo1 != iBitNo2);
3502 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3503 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3504 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3505
3506 /* Get the eflags. */
3507 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3508
3509#ifdef RT_ARCH_AMD64
3510 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1));
3511
3512 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3513 if (iBitNo1 > iBitNo2)
3514 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3515 else
3516 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3517 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3518
3519#elif defined(RT_ARCH_ARM64)
3520 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3521 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3522
3523 /* and tmpreg, eflreg, #1<<iBitNo1 */
3524 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3525
3526 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3527 if (iBitNo1 > iBitNo2)
3528 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3529 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3530 else
3531 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3532 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3533
3534 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3535
3536#else
3537# error "Port me"
3538#endif
3539
3540 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3541 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3542 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3543
3544 /* Free but don't flush the EFlags and tmp registers. */
3545 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3546 iemNativeRegFreeTmp(pReNative, idxEflReg);
3547
3548 /* Make a copy of the core state now as we start the if-block. */
3549 iemNativeCondStartIfBlock(pReNative, off);
3550
3551 return off;
3552}
3553
3554
3555#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
3556 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3557 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3558 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3559 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3560 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3561 do {
3562
3563#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
3564 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3565 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3566 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3567 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3568 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3569 do {
3570
3571/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
3572 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
3573DECL_INLINE_THROW(uint32_t)
3574iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fInverted,
3575 unsigned iBitNo, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3576{
3577 Assert(iBitNo1 != iBitNo);
3578 Assert(iBitNo2 != iBitNo);
3579 Assert(iBitNo2 != iBitNo1);
3580 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3581 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3582 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3583
3584 /* We need an if-block label for the non-inverted variant. */
3585 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
3586 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
3587
3588 /* Get the eflags. */
3589 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3590
3591#ifdef RT_ARCH_AMD64
3592 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1)); /* This must come before we jump anywhere! */
3593#elif defined(RT_ARCH_ARM64)
3594 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3595#endif
3596
3597 /* Check for the lone bit first. */
3598 if (!fInverted)
3599 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3600 else
3601 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
3602
3603 /* Then extract and compare the other two bits. */
3604#ifdef RT_ARCH_AMD64
3605 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3606 if (iBitNo1 > iBitNo2)
3607 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3608 else
3609 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3610 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3611
3612#elif defined(RT_ARCH_ARM64)
3613 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3614
3615 /* and tmpreg, eflreg, #1<<iBitNo1 */
3616 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3617
3618 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3619 if (iBitNo1 > iBitNo2)
3620 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3621 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3622 else
3623 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3624 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3625
3626 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3627
3628#else
3629# error "Port me"
3630#endif
3631
3632 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3633 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3634 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3635
3636 /* Free but don't flush the EFlags and tmp registers. */
3637 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3638 iemNativeRegFreeTmp(pReNative, idxEflReg);
3639
3640 /* Make a copy of the core state now as we start the if-block. */
3641 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3642
3643 return off;
3644}
3645
3646
3647#define IEM_MC_IF_CX_IS_NZ() \
3648 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3649 do {
3650
3651/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3652DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3653{
3654 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3655
3656 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3657 kIemNativeGstRegUse_ReadOnly);
3658 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3659 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3660
3661 iemNativeCondStartIfBlock(pReNative, off);
3662 return off;
3663}
3664
3665
3666#define IEM_MC_IF_ECX_IS_NZ() \
3667 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3668 do {
3669
3670#define IEM_MC_IF_RCX_IS_NZ() \
3671 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3672 do {
3673
3674/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3675DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3676{
3677 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3678
3679 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3680 kIemNativeGstRegUse_ReadOnly);
3681 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3682 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3683
3684 iemNativeCondStartIfBlock(pReNative, off);
3685 return off;
3686}
3687
3688
3689#define IEM_MC_IF_CX_IS_NOT_ONE() \
3690 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3691 do {
3692
3693/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3694DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3695{
3696 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3697
3698 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3699 kIemNativeGstRegUse_ReadOnly);
3700#ifdef RT_ARCH_AMD64
3701 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3702#else
3703 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3704 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3705 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3706#endif
3707 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3708
3709 iemNativeCondStartIfBlock(pReNative, off);
3710 return off;
3711}
3712
3713
3714#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3715 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3716 do {
3717
3718#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3719 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3720 do {
3721
3722/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3723DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3724{
3725 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3726
3727 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3728 kIemNativeGstRegUse_ReadOnly);
3729 if (f64Bit)
3730 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3731 else
3732 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3733 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3734
3735 iemNativeCondStartIfBlock(pReNative, off);
3736 return off;
3737}
3738
3739
3740#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3741 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, \
3742 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3743 iemNativeEflagsToLivenessMask<a_fBit>()); \
3744 do {
3745
3746#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3747 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, \
3748 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3749 iemNativeEflagsToLivenessMask<a_fBit>()); \
3750 do {
3751
3752/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3753 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3754DECL_INLINE_THROW(uint32_t)
3755iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3756 bool fCheckIfSet, unsigned iBitNo, uint64_t fLivenessEflBit)
3757{
3758 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3759 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3760 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3761
3762 /* We have to load both RCX and EFLAGS before we can start branching,
3763 otherwise we'll end up in the else-block with an inconsistent
3764 register allocator state.
3765 Doing EFLAGS first as it's more likely to be loaded, right? */
3766 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3767 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3768 kIemNativeGstRegUse_ReadOnly);
3769
3770 /** @todo we could reduce this to a single branch instruction by spending a
3771 * temporary register and some setnz stuff. Not sure if loops are
3772 * worth it. */
3773 /* Check CX. */
3774#ifdef RT_ARCH_AMD64
3775 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3776#else
3777 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3778 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3779 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3780#endif
3781
3782 /* Check the EFlags bit. */
3783 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3784 !fCheckIfSet /*fJmpIfSet*/);
3785
3786 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3787 iemNativeRegFreeTmp(pReNative, idxEflReg);
3788
3789 iemNativeCondStartIfBlock(pReNative, off);
3790 return off;
3791}
3792
3793
3794#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3795 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, false /*f64Bit*/, \
3796 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3797 iemNativeEflagsToLivenessMask<a_fBit>()); \
3798 do {
3799
3800#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3801 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, false /*f64Bit*/, \
3802 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3803 iemNativeEflagsToLivenessMask<a_fBit>()); \
3804 do {
3805
3806#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3807 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, true /*f64Bit*/, \
3808 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3809 iemNativeEflagsToLivenessMask<a_fBit>()); \
3810 do {
3811
3812#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3813 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, true /*f64Bit*/, \
3814 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3815 iemNativeEflagsToLivenessMask<a_fBit>()); \
3816 do {
3817
3818/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3819 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3820 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3821 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3822DECL_INLINE_THROW(uint32_t)
3823iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fCheckIfSet, bool f64Bit,
3824 unsigned iBitNo, uint64_t fLivenessEFlBit)
3825
3826{
3827 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3828 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3829 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3830
3831 /* We have to load both RCX and EFLAGS before we can start branching,
3832 otherwise we'll end up in the else-block with an inconsistent
3833 register allocator state.
3834 Doing EFLAGS first as it's more likely to be loaded, right? */
3835 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEFlBit);
3836 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3837 kIemNativeGstRegUse_ReadOnly);
3838
3839 /** @todo we could reduce this to a single branch instruction by spending a
3840 * temporary register and some setnz stuff. Not sure if loops are
3841 * worth it. */
3842 /* Check RCX/ECX. */
3843 if (f64Bit)
3844 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3845 else
3846 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3847
3848 /* Check the EFlags bit. */
3849 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3850 !fCheckIfSet /*fJmpIfSet*/);
3851
3852 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3853 iemNativeRegFreeTmp(pReNative, idxEflReg);
3854
3855 iemNativeCondStartIfBlock(pReNative, off);
3856 return off;
3857}
3858
3859
3860#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3861 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3862 do {
3863
3864/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3865DECL_INLINE_THROW(uint32_t)
3866iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3867{
3868 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3869
3870 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3871 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3872 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3873 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3874
3875 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3876
3877 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3878
3879 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3880
3881 iemNativeCondStartIfBlock(pReNative, off);
3882 return off;
3883}
3884
3885
3886#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3887 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3888 do {
3889
3890/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3891DECL_INLINE_THROW(uint32_t)
3892iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3893{
3894 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3895 Assert(iGReg < 16);
3896
3897 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3898 kIemNativeGstRegUse_ReadOnly);
3899
3900 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3901
3902 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3903
3904 iemNativeCondStartIfBlock(pReNative, off);
3905 return off;
3906}
3907
3908
3909
3910/*********************************************************************************************************************************
3911* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3912*********************************************************************************************************************************/
3913
3914#define IEM_MC_NOREF(a_Name) \
3915 RT_NOREF_PV(a_Name)
3916
3917#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3918 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3919
3920#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3921 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3922
3923#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3924 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3925
3926#define IEM_MC_LOCAL(a_Type, a_Name) \
3927 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3928
3929#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3930 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3931
3932#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3933 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3934
3935
3936/**
3937 * Sets the host register for @a idxVarRc to @a idxReg.
3938 *
3939 * Any guest register shadowing will be implictly dropped by this call.
3940 *
3941 * The variable must not have any register associated with it (causes
3942 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3943 * implied.
3944 *
3945 * @returns idxReg
3946 * @param pReNative The recompiler state.
3947 * @param idxVar The variable.
3948 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3949 * @param off For recording in debug info.
3950 * @param fAllocated Set if the register is already allocated, false if not.
3951 *
3952 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3953 */
3954DECL_INLINE_THROW(uint8_t)
3955iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off, bool fAllocated)
3956{
3957 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3958 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3959 Assert(!pVar->fRegAcquired);
3960 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3961 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3962 AssertStmt(RT_BOOL(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)) == fAllocated,
3963 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3964
3965 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3966 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3967
3968 iemNativeVarSetKindToStack(pReNative, idxVar);
3969 pVar->idxReg = idxReg;
3970
3971 return idxReg;
3972}
3973
3974
3975/**
3976 * A convenient helper function.
3977 */
3978DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3979 uint8_t idxReg, uint32_t *poff)
3980{
3981 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff, false /*fAllocated*/);
3982 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3983 return idxReg;
3984}
3985
3986
3987/**
3988 * This is called by IEM_MC_END() to clean up all variables.
3989 */
3990DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3991{
3992 uint32_t const bmVars = pReNative->Core.bmVars;
3993 if (bmVars != 0)
3994 iemNativeVarFreeAllSlow(pReNative, bmVars);
3995 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3996 Assert(pReNative->Core.bmStack == 0);
3997}
3998
3999
4000#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
4001
4002/**
4003 * This is called by IEM_MC_FREE_LOCAL.
4004 */
4005DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4006{
4007 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4008 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
4009 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4010}
4011
4012
4013#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
4014
4015/**
4016 * This is called by IEM_MC_FREE_ARG.
4017 */
4018DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4019{
4020 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4021 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
4022 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4023}
4024
4025
4026#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
4027
4028/**
4029 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
4030 */
4031DECL_INLINE_THROW(uint32_t)
4032iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
4033{
4034 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
4035 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
4036 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4037 Assert( pVarDst->cbVar == sizeof(uint16_t)
4038 || pVarDst->cbVar == sizeof(uint32_t));
4039
4040 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
4041 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
4042 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
4043 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
4044 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4045
4046 Assert(pVarDst->cbVar < pVarSrc->cbVar);
4047
4048 /*
4049 * Special case for immediates.
4050 */
4051 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
4052 {
4053 switch (pVarDst->cbVar)
4054 {
4055 case sizeof(uint16_t):
4056 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
4057 break;
4058 case sizeof(uint32_t):
4059 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
4060 break;
4061 default: AssertFailed(); break;
4062 }
4063 }
4064 else
4065 {
4066 /*
4067 * The generic solution for now.
4068 */
4069 /** @todo optimize this by having the python script make sure the source
4070 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
4071 * statement. Then we could just transfer the register assignments. */
4072 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
4073 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
4074 switch (pVarDst->cbVar)
4075 {
4076 case sizeof(uint16_t):
4077 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
4078 break;
4079 case sizeof(uint32_t):
4080 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
4081 break;
4082 default: AssertFailed(); break;
4083 }
4084 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
4085 iemNativeVarRegisterRelease(pReNative, idxVarDst);
4086 }
4087 return off;
4088}
4089
4090
4091
4092/*********************************************************************************************************************************
4093* Emitters for IEM_MC_CALL_CIMPL_XXX *
4094*********************************************************************************************************************************/
4095
4096/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
4097DECL_INLINE_THROW(uint32_t)
4098iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
4099 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
4100
4101{
4102 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
4103 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
4104
4105 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
4106 when a calls clobber any of the relevant control registers. */
4107#if 1
4108 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
4109 {
4110 /* Likely as long as call+ret are done via cimpl. */
4111 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
4112 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
4113 }
4114 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
4115 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4116 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
4117 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4118 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
4119 else
4120 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4121 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4122 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4123
4124#else
4125 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
4126 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4127 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
4128 pReNative->fSimdRaiseXcptChecksEmitted = 0;
4129 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
4130 || pfnCImpl == (uintptr_t)iemCImpl_callf
4131 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
4132 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
4133 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4134 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4135 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4136#endif
4137
4138#ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
4139 /* Mark the host floating point control register as not synced if MXCSR is modified. */
4140 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
4141 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
4142#endif
4143
4144 /*
4145 * Do all the call setup and cleanup.
4146 */
4147 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
4148
4149 /*
4150 * Load the two or three hidden arguments.
4151 */
4152#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
4153 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict */
4154 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4155 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
4156#else
4157 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4158 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
4159#endif
4160
4161 /*
4162 * Make the call and check the return code.
4163 *
4164 * Shadow PC copies are always flushed here, other stuff depends on flags.
4165 * Segment and general purpose registers are explictily flushed via the
4166 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
4167 * macros.
4168 */
4169 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
4170#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
4171 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict (see above) */
4172#endif
4173 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
4174 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
4175 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4176 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4177
4178#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
4179 pReNative->Core.fDebugPcInitialized = false;
4180 Log4(("fDebugPcInitialized=false cimpl off=%#x (v1)\n", off));
4181#endif
4182
4183 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4184}
4185
4186
4187#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4188 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
4189
4190/** Emits code for IEM_MC_CALL_CIMPL_1. */
4191DECL_INLINE_THROW(uint32_t)
4192iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4193 uintptr_t pfnCImpl, uint8_t idxArg0)
4194{
4195 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4196 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
4197}
4198
4199
4200#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4201 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
4202
4203/** Emits code for IEM_MC_CALL_CIMPL_2. */
4204DECL_INLINE_THROW(uint32_t)
4205iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4206 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
4207{
4208 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4209 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4210 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
4211}
4212
4213
4214#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4215 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4216 (uintptr_t)a_pfnCImpl, a0, a1, a2)
4217
4218/** Emits code for IEM_MC_CALL_CIMPL_3. */
4219DECL_INLINE_THROW(uint32_t)
4220iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4221 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4222{
4223 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4224 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4225 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4226 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
4227}
4228
4229
4230#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
4231 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4232 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
4233
4234/** Emits code for IEM_MC_CALL_CIMPL_4. */
4235DECL_INLINE_THROW(uint32_t)
4236iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4237 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4238{
4239 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4240 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4241 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4242 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4243 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
4244}
4245
4246
4247#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
4248 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4249 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
4250
4251/** Emits code for IEM_MC_CALL_CIMPL_4. */
4252DECL_INLINE_THROW(uint32_t)
4253iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4254 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
4255{
4256 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4257 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4258 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4259 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4260 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
4261 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
4262}
4263
4264
4265/** Recompiler debugging: Flush guest register shadow copies. */
4266#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
4267
4268
4269
4270/*********************************************************************************************************************************
4271* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
4272*********************************************************************************************************************************/
4273
4274/**
4275 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
4276 */
4277DECL_INLINE_THROW(uint32_t)
4278iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4279 uintptr_t pfnAImpl, uint8_t cArgs)
4280{
4281 if (idxVarRc != UINT8_MAX)
4282 {
4283 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
4284 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
4285 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
4286 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
4287 }
4288
4289 /*
4290 * Do all the call setup and cleanup.
4291 *
4292 * It is only required to flush pending guest register writes in call volatile registers as
4293 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
4294 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
4295 * no matter the fFlushPendingWrites parameter.
4296 */
4297 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
4298
4299 /*
4300 * Make the call and update the return code variable if we've got one.
4301 */
4302 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
4303 if (idxVarRc != UINT8_MAX)
4304 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off, false /*fAllocated*/);
4305
4306 return off;
4307}
4308
4309
4310
4311#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
4312 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
4313
4314#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
4315 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
4316
4317/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
4318DECL_INLINE_THROW(uint32_t)
4319iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
4320{
4321 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
4322}
4323
4324
4325#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
4326 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
4327
4328#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
4329 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
4330
4331/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
4332DECL_INLINE_THROW(uint32_t)
4333iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
4334{
4335 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4336 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
4337}
4338
4339
4340#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
4341 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
4342
4343#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
4344 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
4345
4346/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
4347DECL_INLINE_THROW(uint32_t)
4348iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4349 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
4350{
4351 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4352 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4353 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
4354}
4355
4356
4357#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
4358 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
4359
4360#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
4361 IEM_MC_LOCAL(a_rcType, a_rc); \
4362 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
4363
4364/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
4365DECL_INLINE_THROW(uint32_t)
4366iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4367 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4368{
4369 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4370 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4371 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4372 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
4373}
4374
4375
4376#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
4377 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4378
4379#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
4380 IEM_MC_LOCAL(a_rcType, a_rc); \
4381 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4382
4383/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
4384DECL_INLINE_THROW(uint32_t)
4385iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4386 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4387{
4388 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4389 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4390 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4391 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
4392 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
4393}
4394
4395
4396
4397/*********************************************************************************************************************************
4398* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
4399*********************************************************************************************************************************/
4400
4401#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
4402 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
4403
4404#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4405 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
4406
4407#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4408 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
4409
4410#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4411 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
4412
4413
4414/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
4415 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
4416DECL_INLINE_THROW(uint32_t)
4417iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
4418{
4419 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4420 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4421 Assert(iGRegEx < 20);
4422
4423 /* Same discussion as in iemNativeEmitFetchGregU16 */
4424 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4425 kIemNativeGstRegUse_ReadOnly);
4426
4427 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4428 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4429
4430 /* The value is zero-extended to the full 64-bit host register width. */
4431 if (iGRegEx < 16)
4432 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4433 else
4434 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4435
4436 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4437 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4438 return off;
4439}
4440
4441
4442#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4443 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
4444
4445#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4446 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
4447
4448#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4449 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
4450
4451/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
4452DECL_INLINE_THROW(uint32_t)
4453iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
4454{
4455 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4456 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4457 Assert(iGRegEx < 20);
4458
4459 /* Same discussion as in iemNativeEmitFetchGregU16 */
4460 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4461 kIemNativeGstRegUse_ReadOnly);
4462
4463 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4464 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4465
4466 if (iGRegEx < 16)
4467 {
4468 switch (cbSignExtended)
4469 {
4470 case sizeof(uint16_t):
4471 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4472 break;
4473 case sizeof(uint32_t):
4474 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4475 break;
4476 case sizeof(uint64_t):
4477 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4478 break;
4479 default: AssertFailed(); break;
4480 }
4481 }
4482 else
4483 {
4484 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4485 switch (cbSignExtended)
4486 {
4487 case sizeof(uint16_t):
4488 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4489 break;
4490 case sizeof(uint32_t):
4491 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4492 break;
4493 case sizeof(uint64_t):
4494 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4495 break;
4496 default: AssertFailed(); break;
4497 }
4498 }
4499
4500 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4501 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4502 return off;
4503}
4504
4505
4506
4507#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
4508 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
4509
4510#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
4511 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4512
4513#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
4514 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4515
4516/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
4517DECL_INLINE_THROW(uint32_t)
4518iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4519{
4520 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4521 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4522 Assert(iGReg < 16);
4523
4524 /*
4525 * We can either just load the low 16-bit of the GPR into a host register
4526 * for the variable, or we can do so via a shadow copy host register. The
4527 * latter will avoid having to reload it if it's being stored later, but
4528 * will waste a host register if it isn't touched again. Since we don't
4529 * know what going to happen, we choose the latter for now.
4530 */
4531 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4532 kIemNativeGstRegUse_ReadOnly);
4533
4534 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4535 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4536 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4537 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4538
4539 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4540 return off;
4541}
4542
4543#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
4544 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
4545
4546/** Emits code for IEM_MC_FETCH_GREG_I16. */
4547DECL_INLINE_THROW(uint32_t)
4548iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4549{
4550 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4551 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
4552 Assert(iGReg < 16);
4553
4554 /*
4555 * We can either just load the low 16-bit of the GPR into a host register
4556 * for the variable, or we can do so via a shadow copy host register. The
4557 * latter will avoid having to reload it if it's being stored later, but
4558 * will waste a host register if it isn't touched again. Since we don't
4559 * know what going to happen, we choose the latter for now.
4560 */
4561 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4562 kIemNativeGstRegUse_ReadOnly);
4563
4564 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4565 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4566#ifdef RT_ARCH_AMD64
4567 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4568#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
4569 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4570#endif
4571 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4572
4573 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4574 return off;
4575}
4576
4577
4578#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
4579 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4580
4581#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
4582 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4583
4584/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
4585DECL_INLINE_THROW(uint32_t)
4586iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
4587{
4588 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4589 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4590 Assert(iGReg < 16);
4591
4592 /*
4593 * We can either just load the low 16-bit of the GPR into a host register
4594 * for the variable, or we can do so via a shadow copy host register. The
4595 * latter will avoid having to reload it if it's being stored later, but
4596 * will waste a host register if it isn't touched again. Since we don't
4597 * know what going to happen, we choose the latter for now.
4598 */
4599 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4600 kIemNativeGstRegUse_ReadOnly);
4601
4602 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4603 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4604 if (cbSignExtended == sizeof(uint32_t))
4605 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4606 else
4607 {
4608 Assert(cbSignExtended == sizeof(uint64_t));
4609 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4610 }
4611 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4612
4613 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4614 return off;
4615}
4616
4617
4618#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
4619 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
4620
4621#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
4622 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
4623
4624#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
4625 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
4626
4627/** Emits code for IEM_MC_FETCH_GREG_U32. */
4628DECL_INLINE_THROW(uint32_t)
4629iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4630{
4631 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4632 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4633 Assert(iGReg < 16);
4634
4635 /*
4636 * We can either just load the low 16-bit of the GPR into a host register
4637 * for the variable, or we can do so via a shadow copy host register. The
4638 * latter will avoid having to reload it if it's being stored later, but
4639 * will waste a host register if it isn't touched again. Since we don't
4640 * know what going to happen, we choose the latter for now.
4641 */
4642 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4643 kIemNativeGstRegUse_ReadOnly);
4644
4645 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4646 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4647 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4648 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4649
4650 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4651 return off;
4652}
4653
4654
4655#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4656 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4657
4658/** Emits code for IEM_MC_FETCH_GREG_U32. */
4659DECL_INLINE_THROW(uint32_t)
4660iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4661{
4662 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4663 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4664 Assert(iGReg < 16);
4665
4666 /*
4667 * We can either just load the low 32-bit of the GPR into a host register
4668 * for the variable, or we can do so via a shadow copy host register. The
4669 * latter will avoid having to reload it if it's being stored later, but
4670 * will waste a host register if it isn't touched again. Since we don't
4671 * know what going to happen, we choose the latter for now.
4672 */
4673 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4674 kIemNativeGstRegUse_ReadOnly);
4675
4676 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4677 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4678 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4679 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4680
4681 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4682 return off;
4683}
4684
4685
4686#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4687 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4688
4689#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4690 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4691
4692/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4693 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4694DECL_INLINE_THROW(uint32_t)
4695iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4696{
4697 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4698 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4699 Assert(iGReg < 16);
4700
4701 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4702 kIemNativeGstRegUse_ReadOnly);
4703
4704 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4705 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4706 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4707 /** @todo name the register a shadow one already? */
4708 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4709
4710 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4711 return off;
4712}
4713
4714
4715#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4716 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4717
4718/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4719DECL_INLINE_THROW(uint32_t)
4720iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4721{
4722 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4723 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4724 Assert(iGRegLo < 16 && iGRegHi < 16);
4725
4726 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4727 kIemNativeGstRegUse_ReadOnly);
4728 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4729 kIemNativeGstRegUse_ReadOnly);
4730
4731 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4732 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4733 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4734 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4735
4736 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4737 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4738 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4739 return off;
4740}
4741
4742
4743/*********************************************************************************************************************************
4744* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4745*********************************************************************************************************************************/
4746
4747#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4748 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4749
4750/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4751DECL_INLINE_THROW(uint32_t)
4752iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4753{
4754 Assert(iGRegEx < 20);
4755 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4756 kIemNativeGstRegUse_ForUpdate);
4757#ifdef RT_ARCH_AMD64
4758 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4759
4760 /* To the lowest byte of the register: mov r8, imm8 */
4761 if (iGRegEx < 16)
4762 {
4763 if (idxGstTmpReg >= 8)
4764 pbCodeBuf[off++] = X86_OP_REX_B;
4765 else if (idxGstTmpReg >= 4)
4766 pbCodeBuf[off++] = X86_OP_REX;
4767 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4768 pbCodeBuf[off++] = u8Value;
4769 }
4770 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4771 else if (idxGstTmpReg < 4)
4772 {
4773 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4774 pbCodeBuf[off++] = u8Value;
4775 }
4776 else
4777 {
4778 /* ror reg64, 8 */
4779 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4780 pbCodeBuf[off++] = 0xc1;
4781 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4782 pbCodeBuf[off++] = 8;
4783
4784 /* mov reg8, imm8 */
4785 if (idxGstTmpReg >= 8)
4786 pbCodeBuf[off++] = X86_OP_REX_B;
4787 else if (idxGstTmpReg >= 4)
4788 pbCodeBuf[off++] = X86_OP_REX;
4789 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4790 pbCodeBuf[off++] = u8Value;
4791
4792 /* rol reg64, 8 */
4793 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4794 pbCodeBuf[off++] = 0xc1;
4795 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4796 pbCodeBuf[off++] = 8;
4797 }
4798
4799#elif defined(RT_ARCH_ARM64)
4800 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4801 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4802 if (iGRegEx < 16)
4803 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4804 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4805 else
4806 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4807 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4808 iemNativeRegFreeTmp(pReNative, idxImmReg);
4809
4810#else
4811# error "Port me!"
4812#endif
4813
4814 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4815
4816#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4817 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4818#endif
4819
4820 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4821 return off;
4822}
4823
4824
4825#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4826 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4827
4828/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4829DECL_INLINE_THROW(uint32_t)
4830iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4831{
4832 Assert(iGRegEx < 20);
4833 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4834
4835 /*
4836 * If it's a constant value (unlikely) we treat this as a
4837 * IEM_MC_STORE_GREG_U8_CONST statement.
4838 */
4839 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4840 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4841 { /* likely */ }
4842 else
4843 {
4844 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4845 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4846 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4847 }
4848
4849 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4850 kIemNativeGstRegUse_ForUpdate);
4851 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxValueVar, &off);
4852
4853#ifdef RT_ARCH_AMD64
4854 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4855 if (iGRegEx < 16)
4856 {
4857 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4858 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4859 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4860 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4861 pbCodeBuf[off++] = X86_OP_REX;
4862 pbCodeBuf[off++] = 0x8a;
4863 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4864 }
4865 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4866 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4867 {
4868 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4869 pbCodeBuf[off++] = 0x8a;
4870 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4871 }
4872 else
4873 {
4874 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4875
4876 /* ror reg64, 8 */
4877 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4878 pbCodeBuf[off++] = 0xc1;
4879 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4880 pbCodeBuf[off++] = 8;
4881
4882 /* mov reg8, reg8(r/m) */
4883 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4884 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4885 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4886 pbCodeBuf[off++] = X86_OP_REX;
4887 pbCodeBuf[off++] = 0x8a;
4888 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4889
4890 /* rol reg64, 8 */
4891 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4892 pbCodeBuf[off++] = 0xc1;
4893 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4894 pbCodeBuf[off++] = 8;
4895 }
4896
4897#elif defined(RT_ARCH_ARM64)
4898 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4899 or
4900 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4901 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4902 if (iGRegEx < 16)
4903 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4904 else
4905 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4906
4907#else
4908# error "Port me!"
4909#endif
4910 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4911
4912 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4913
4914#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4915 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4916#endif
4917 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4918 return off;
4919}
4920
4921
4922
4923#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4924 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4925
4926/** Emits code for IEM_MC_STORE_GREG_U16. */
4927DECL_INLINE_THROW(uint32_t)
4928iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4929{
4930 Assert(iGReg < 16);
4931 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4932 kIemNativeGstRegUse_ForUpdate);
4933#ifdef RT_ARCH_AMD64
4934 /* mov reg16, imm16 */
4935 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4936 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4937 if (idxGstTmpReg >= 8)
4938 pbCodeBuf[off++] = X86_OP_REX_B;
4939 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4940 pbCodeBuf[off++] = RT_BYTE1(uValue);
4941 pbCodeBuf[off++] = RT_BYTE2(uValue);
4942
4943#elif defined(RT_ARCH_ARM64)
4944 /* movk xdst, #uValue, lsl #0 */
4945 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4946 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4947
4948#else
4949# error "Port me!"
4950#endif
4951
4952 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4953
4954#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4955 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4956#endif
4957 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4958 return off;
4959}
4960
4961
4962#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4963 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4964
4965/** Emits code for IEM_MC_STORE_GREG_U16. */
4966DECL_INLINE_THROW(uint32_t)
4967iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4968{
4969 Assert(iGReg < 16);
4970 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4971
4972 /*
4973 * If it's a constant value (unlikely) we treat this as a
4974 * IEM_MC_STORE_GREG_U16_CONST statement.
4975 */
4976 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4977 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4978 { /* likely */ }
4979 else
4980 {
4981 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4982 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4983 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4984 }
4985
4986 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4987 kIemNativeGstRegUse_ForUpdate);
4988
4989#ifdef RT_ARCH_AMD64
4990 /* mov reg16, reg16 or [mem16] */
4991 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4992 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4993 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4994 {
4995 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4996 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4997 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4998 pbCodeBuf[off++] = 0x8b;
4999 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
5000 }
5001 else
5002 {
5003 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
5004 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
5005 if (idxGstTmpReg >= 8)
5006 pbCodeBuf[off++] = X86_OP_REX_R;
5007 pbCodeBuf[off++] = 0x8b;
5008 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
5009 }
5010
5011#elif defined(RT_ARCH_ARM64)
5012 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
5013 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxValueVar, &off);
5014 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5015 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
5016 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5017
5018#else
5019# error "Port me!"
5020#endif
5021
5022 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5023
5024#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5025 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5026#endif
5027 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5028 return off;
5029}
5030
5031
5032#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
5033 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
5034
5035/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
5036DECL_INLINE_THROW(uint32_t)
5037iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
5038{
5039 Assert(iGReg < 16);
5040 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5041 kIemNativeGstRegUse_ForFullWrite);
5042 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5043#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5044 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5045#endif
5046 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5047 return off;
5048}
5049
5050
5051#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
5052 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
5053
5054#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
5055 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
5056
5057/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
5058DECL_INLINE_THROW(uint32_t)
5059iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5060{
5061 Assert(iGReg < 16);
5062 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5063
5064 /*
5065 * If it's a constant value (unlikely) we treat this as a
5066 * IEM_MC_STORE_GREG_U32_CONST statement.
5067 */
5068 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5069 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5070 { /* likely */ }
5071 else
5072 {
5073 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5074 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5075 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
5076 }
5077
5078 /*
5079 * For the rest we allocate a guest register for the variable and writes
5080 * it to the CPUMCTX structure.
5081 */
5082 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5083#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5084 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5085#else
5086 RT_NOREF(idxVarReg);
5087#endif
5088#ifdef VBOX_STRICT
5089 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
5090#endif
5091 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5092 return off;
5093}
5094
5095
5096#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
5097 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
5098
5099/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
5100DECL_INLINE_THROW(uint32_t)
5101iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
5102{
5103 Assert(iGReg < 16);
5104 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5105 kIemNativeGstRegUse_ForFullWrite);
5106 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5107#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5108 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5109#endif
5110 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5111 return off;
5112}
5113
5114
5115#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
5116 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
5117
5118#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
5119 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
5120
5121/** Emits code for IEM_MC_STORE_GREG_U64. */
5122DECL_INLINE_THROW(uint32_t)
5123iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5124{
5125 Assert(iGReg < 16);
5126 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5127
5128 /*
5129 * If it's a constant value (unlikely) we treat this as a
5130 * IEM_MC_STORE_GREG_U64_CONST statement.
5131 */
5132 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5133 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5134 { /* likely */ }
5135 else
5136 {
5137 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5138 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5139 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
5140 }
5141
5142 /*
5143 * For the rest we allocate a guest register for the variable and writes
5144 * it to the CPUMCTX structure.
5145 */
5146 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5147#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5148 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5149#else
5150 RT_NOREF(idxVarReg);
5151#endif
5152 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5153 return off;
5154}
5155
5156
5157#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
5158 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
5159
5160/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
5161DECL_INLINE_THROW(uint32_t)
5162iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
5163{
5164 Assert(iGReg < 16);
5165 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5166 kIemNativeGstRegUse_ForUpdate);
5167 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
5168#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5169 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5170#endif
5171 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5172 return off;
5173}
5174
5175
5176#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
5177 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
5178
5179/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
5180DECL_INLINE_THROW(uint32_t)
5181iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
5182{
5183 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5184 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
5185 Assert(iGRegLo < 16 && iGRegHi < 16);
5186
5187 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
5188 kIemNativeGstRegUse_ForFullWrite);
5189 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
5190 kIemNativeGstRegUse_ForFullWrite);
5191
5192 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5193 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
5194 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
5195 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
5196
5197 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
5198 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
5199 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
5200 return off;
5201}
5202
5203
5204/*********************************************************************************************************************************
5205* General purpose register manipulation (add, sub). *
5206*********************************************************************************************************************************/
5207
5208#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5209 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5210
5211/** Emits code for IEM_MC_ADD_GREG_U16. */
5212DECL_INLINE_THROW(uint32_t)
5213iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
5214{
5215 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5216 kIemNativeGstRegUse_ForUpdate);
5217
5218#ifdef RT_ARCH_AMD64
5219 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5220 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5221 if (idxGstTmpReg >= 8)
5222 pbCodeBuf[off++] = X86_OP_REX_B;
5223 if (uAddend == 1)
5224 {
5225 pbCodeBuf[off++] = 0xff; /* inc */
5226 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5227 }
5228 else
5229 {
5230 pbCodeBuf[off++] = 0x81;
5231 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5232 pbCodeBuf[off++] = uAddend;
5233 pbCodeBuf[off++] = 0;
5234 }
5235
5236#else
5237 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5238 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5239
5240 /* sub tmp, gstgrp, uAddend */
5241 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
5242
5243 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5244 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5245
5246 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5247#endif
5248
5249 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5250
5251#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5252 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5253#endif
5254
5255 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5256 return off;
5257}
5258
5259
5260#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
5261 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5262
5263#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
5264 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5265
5266/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
5267DECL_INLINE_THROW(uint32_t)
5268iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
5269{
5270 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5271 kIemNativeGstRegUse_ForUpdate);
5272
5273#ifdef RT_ARCH_AMD64
5274 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5275 if (f64Bit)
5276 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5277 else if (idxGstTmpReg >= 8)
5278 pbCodeBuf[off++] = X86_OP_REX_B;
5279 if (uAddend == 1)
5280 {
5281 pbCodeBuf[off++] = 0xff; /* inc */
5282 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5283 }
5284 else if (uAddend < 128)
5285 {
5286 pbCodeBuf[off++] = 0x83; /* add */
5287 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5288 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5289 }
5290 else
5291 {
5292 pbCodeBuf[off++] = 0x81; /* add */
5293 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5294 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5295 pbCodeBuf[off++] = 0;
5296 pbCodeBuf[off++] = 0;
5297 pbCodeBuf[off++] = 0;
5298 }
5299
5300#else
5301 /* sub tmp, gstgrp, uAddend */
5302 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5303 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
5304
5305#endif
5306
5307 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5308
5309#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5310 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5311#endif
5312
5313 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5314 return off;
5315}
5316
5317
5318
5319#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5320 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5321
5322/** Emits code for IEM_MC_SUB_GREG_U16. */
5323DECL_INLINE_THROW(uint32_t)
5324iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5325{
5326 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5327 kIemNativeGstRegUse_ForUpdate);
5328
5329#ifdef RT_ARCH_AMD64
5330 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5331 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5332 if (idxGstTmpReg >= 8)
5333 pbCodeBuf[off++] = X86_OP_REX_B;
5334 if (uSubtrahend == 1)
5335 {
5336 pbCodeBuf[off++] = 0xff; /* dec */
5337 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5338 }
5339 else
5340 {
5341 pbCodeBuf[off++] = 0x81;
5342 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5343 pbCodeBuf[off++] = uSubtrahend;
5344 pbCodeBuf[off++] = 0;
5345 }
5346
5347#else
5348 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5349 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5350
5351 /* sub tmp, gstgrp, uSubtrahend */
5352 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5353
5354 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5355 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5356
5357 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5358#endif
5359
5360 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5361
5362#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5363 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5364#endif
5365
5366 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5367 return off;
5368}
5369
5370
5371#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5372 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5373
5374#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5375 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5376
5377/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5378DECL_INLINE_THROW(uint32_t)
5379iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5380{
5381 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5382 kIemNativeGstRegUse_ForUpdate);
5383
5384#ifdef RT_ARCH_AMD64
5385 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5386 if (f64Bit)
5387 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5388 else if (idxGstTmpReg >= 8)
5389 pbCodeBuf[off++] = X86_OP_REX_B;
5390 if (uSubtrahend == 1)
5391 {
5392 pbCodeBuf[off++] = 0xff; /* dec */
5393 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5394 }
5395 else if (uSubtrahend < 128)
5396 {
5397 pbCodeBuf[off++] = 0x83; /* sub */
5398 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5399 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5400 }
5401 else
5402 {
5403 pbCodeBuf[off++] = 0x81; /* sub */
5404 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5405 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5406 pbCodeBuf[off++] = 0;
5407 pbCodeBuf[off++] = 0;
5408 pbCodeBuf[off++] = 0;
5409 }
5410
5411#else
5412 /* sub tmp, gstgrp, uSubtrahend */
5413 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5414 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5415
5416#endif
5417
5418 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5419
5420#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5421 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5422#endif
5423
5424 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5425 return off;
5426}
5427
5428
5429#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
5430 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5431
5432#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
5433 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5434
5435#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
5436 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5437
5438#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
5439 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5440
5441/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
5442DECL_INLINE_THROW(uint32_t)
5443iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5444{
5445#ifdef VBOX_STRICT
5446 switch (cbMask)
5447 {
5448 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5449 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5450 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5451 case sizeof(uint64_t): break;
5452 default: AssertFailedBreak();
5453 }
5454#endif
5455
5456 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5457 kIemNativeGstRegUse_ForUpdate);
5458
5459 switch (cbMask)
5460 {
5461 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5462 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
5463 break;
5464 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
5465 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
5466 break;
5467 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5468 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5469 break;
5470 case sizeof(uint64_t):
5471 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
5472 break;
5473 default: AssertFailedBreak();
5474 }
5475
5476 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5477
5478#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5479 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5480#endif
5481
5482 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5483 return off;
5484}
5485
5486
5487#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
5488 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5489
5490#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
5491 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5492
5493#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
5494 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5495
5496#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
5497 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5498
5499/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
5500DECL_INLINE_THROW(uint32_t)
5501iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5502{
5503#ifdef VBOX_STRICT
5504 switch (cbMask)
5505 {
5506 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5507 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5508 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5509 case sizeof(uint64_t): break;
5510 default: AssertFailedBreak();
5511 }
5512#endif
5513
5514 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5515 kIemNativeGstRegUse_ForUpdate);
5516
5517 switch (cbMask)
5518 {
5519 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5520 case sizeof(uint16_t):
5521 case sizeof(uint64_t):
5522 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
5523 break;
5524 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5525 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5526 break;
5527 default: AssertFailedBreak();
5528 }
5529
5530 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5531
5532#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5533 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5534#endif
5535
5536 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5537 return off;
5538}
5539
5540
5541/*********************************************************************************************************************************
5542* Local/Argument variable manipulation (add, sub, and, or). *
5543*********************************************************************************************************************************/
5544
5545#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
5546 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5547
5548#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
5549 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5550
5551#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
5552 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5553
5554#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
5555 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5556
5557
5558#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
5559 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
5560
5561#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
5562 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
5563
5564#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
5565 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
5566
5567/** Emits code for AND'ing a local and a constant value. */
5568DECL_INLINE_THROW(uint32_t)
5569iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5570{
5571#ifdef VBOX_STRICT
5572 switch (cbMask)
5573 {
5574 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5575 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5576 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5577 case sizeof(uint64_t): break;
5578 default: AssertFailedBreak();
5579 }
5580#endif
5581
5582 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5583 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5584
5585 if (cbMask <= sizeof(uint32_t))
5586 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
5587 else
5588 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
5589
5590 iemNativeVarRegisterRelease(pReNative, idxVar);
5591 return off;
5592}
5593
5594
5595#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
5596 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5597
5598#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
5599 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5600
5601#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
5602 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5603
5604#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
5605 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5606
5607/** Emits code for OR'ing a local and a constant value. */
5608DECL_INLINE_THROW(uint32_t)
5609iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5610{
5611#ifdef VBOX_STRICT
5612 switch (cbMask)
5613 {
5614 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5615 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5616 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5617 case sizeof(uint64_t): break;
5618 default: AssertFailedBreak();
5619 }
5620#endif
5621
5622 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5623 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5624
5625 if (cbMask <= sizeof(uint32_t))
5626 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
5627 else
5628 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
5629
5630 iemNativeVarRegisterRelease(pReNative, idxVar);
5631 return off;
5632}
5633
5634
5635#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
5636 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
5637
5638#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
5639 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
5640
5641#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
5642 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
5643
5644/** Emits code for reversing the byte order in a local value. */
5645DECL_INLINE_THROW(uint32_t)
5646iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
5647{
5648 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5649 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5650
5651 switch (cbLocal)
5652 {
5653 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5654 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5655 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5656 default: AssertFailedBreak();
5657 }
5658
5659 iemNativeVarRegisterRelease(pReNative, idxVar);
5660 return off;
5661}
5662
5663
5664#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5665 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5666
5667#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5668 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5669
5670#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5671 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5672
5673/** Emits code for shifting left a local value. */
5674DECL_INLINE_THROW(uint32_t)
5675iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5676{
5677#ifdef VBOX_STRICT
5678 switch (cbLocal)
5679 {
5680 case sizeof(uint8_t): Assert(cShift < 8); break;
5681 case sizeof(uint16_t): Assert(cShift < 16); break;
5682 case sizeof(uint32_t): Assert(cShift < 32); break;
5683 case sizeof(uint64_t): Assert(cShift < 64); break;
5684 default: AssertFailedBreak();
5685 }
5686#endif
5687
5688 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5689 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5690
5691 if (cbLocal <= sizeof(uint32_t))
5692 {
5693 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5694 if (cbLocal < sizeof(uint32_t))
5695 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5696 cbLocal == sizeof(uint16_t)
5697 ? UINT32_C(0xffff)
5698 : UINT32_C(0xff));
5699 }
5700 else
5701 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5702
5703 iemNativeVarRegisterRelease(pReNative, idxVar);
5704 return off;
5705}
5706
5707
5708#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5709 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5710
5711#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5712 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5713
5714#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5715 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5716
5717/** Emits code for shifting left a local value. */
5718DECL_INLINE_THROW(uint32_t)
5719iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5720{
5721#ifdef VBOX_STRICT
5722 switch (cbLocal)
5723 {
5724 case sizeof(int8_t): Assert(cShift < 8); break;
5725 case sizeof(int16_t): Assert(cShift < 16); break;
5726 case sizeof(int32_t): Assert(cShift < 32); break;
5727 case sizeof(int64_t): Assert(cShift < 64); break;
5728 default: AssertFailedBreak();
5729 }
5730#endif
5731
5732 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5733 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5734
5735 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5736 if (cbLocal == sizeof(uint8_t))
5737 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5738 else if (cbLocal == sizeof(uint16_t))
5739 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5740
5741 if (cbLocal <= sizeof(uint32_t))
5742 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5743 else
5744 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5745
5746 iemNativeVarRegisterRelease(pReNative, idxVar);
5747 return off;
5748}
5749
5750
5751#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5752 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5753
5754#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5755 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5756
5757#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5758 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5759
5760/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5761DECL_INLINE_THROW(uint32_t)
5762iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5763{
5764 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5765 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5766 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5767 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5768
5769 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5770 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquireInited(pReNative, idxVarEffAddr, &off);
5771
5772 /* Need to sign extend the value. */
5773 if (cbLocal <= sizeof(uint32_t))
5774 {
5775/** @todo ARM64: In case of boredone, the extended add instruction can do the
5776 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5777 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5778
5779 switch (cbLocal)
5780 {
5781 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5782 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5783 default: AssertFailed();
5784 }
5785
5786 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5787 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5788 }
5789 else
5790 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5791
5792 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5793 iemNativeVarRegisterRelease(pReNative, idxVar);
5794 return off;
5795}
5796
5797
5798
5799/*********************************************************************************************************************************
5800* EFLAGS *
5801*********************************************************************************************************************************/
5802
5803#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5804# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5805#else
5806# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5807 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5808
5809DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5810{
5811 if (fEflOutput)
5812 {
5813 PVMCPUCC const pVCpu = pReNative->pVCpu;
5814# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5815 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5816 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5817 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5818# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5819 if (fEflOutput & (a_fEfl)) \
5820 { \
5821 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5822 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5823 else \
5824 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5825 } else do { } while (0)
5826# else
5827 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5828 IEMLIVENESSBIT const LivenessClobbered = { IEMLIVENESS_STATE_GET_WILL_BE_CLOBBERED_SET(pLivenessEntry) };
5829 IEMLIVENESSBIT const LivenessDelayable = { IEMLIVENESS_STATE_GET_CAN_BE_POSTPONED_SET(pLivenessEntry) };
5830# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5831 if (fEflOutput & (a_fEfl)) \
5832 { \
5833 if (LivenessClobbered.a_fLivenessMember) \
5834 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5835 else if (LivenessDelayable.a_fLivenessMember) \
5836 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5837 else \
5838 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5839 } else do { } while (0)
5840# endif
5841 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5842 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5843 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5844 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5845 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5846 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5847 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5848# undef CHECK_FLAG_AND_UPDATE_STATS
5849 }
5850 RT_NOREF(fEflInput);
5851}
5852#endif /* VBOX_WITH_STATISTICS */
5853
5854#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5855#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5856 off = iemNativeEmitFetchEFlags<a_fEflInput, iemNativeEflagsToLivenessMask<a_fEflInput>(),\
5857 a_fEflOutput, iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags)
5858
5859/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5860template<uint32_t const a_fEflInput, uint64_t const a_fLivenessEflInput,
5861 uint32_t const a_fEflOutput, uint64_t const a_fLivenessEflOutput>
5862DECL_INLINE_THROW(uint32_t)
5863iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
5864{
5865 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5866 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5867 /** @todo fix NOT AssertCompile(a_fEflInput != 0 || a_fEflOutput != 0); */
5868
5869#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5870# ifdef VBOX_STRICT
5871 if ( pReNative->idxCurCall != 0
5872 && (a_fEflInput != 0 || a_fEflOutput != 0) /* for NOT these are both zero for now. */)
5873 {
5874 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5875 RT_CONSTEXPR uint32_t const fBoth = a_fEflInput | a_fEflOutput;
5876# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5877 AssertMsg( !(fBoth & (a_fElfConst)) \
5878 || (!(a_fEflInput & (a_fElfConst)) \
5879 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5880 : !(a_fEflOutput & (a_fElfConst)) \
5881 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5882 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5883 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5884 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5885 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5886 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5887 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5888 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5889 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5890 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5891# undef ASSERT_ONE_EFL
5892 }
5893# endif
5894#endif
5895
5896 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, a_fEflInput);
5897 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, a_fEflInput);
5898
5899 /** @todo This could be prettier...*/
5900 /** @todo Also, the shadowing+liveness handling of EFlags is currently
5901 * problematic, but I'll try tackle that soon (@bugref{10720}). */
5902 PCIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarEFlags)];
5903 Assert(pVar->enmKind == kIemNativeVarKind_Invalid || pVar->enmKind == kIemNativeVarKind_Stack);
5904 Assert(pVar->idxReg == UINT8_MAX);
5905 if (pVar->uArgNo >= IEMNATIVE_CALL_ARG_GREG_COUNT)
5906 {
5907 /** @todo We could use kIemNativeGstRegUse_ReadOnly here when fOutput is
5908 * zero, but since iemNativeVarRegisterSet clears the shadowing,
5909 * that's counter productive... */
5910 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
5911 a_fLivenessEflInput, a_fLivenessEflOutput);
5912 iemNativeVarRegisterSet(pReNative, idxVarEFlags, idxGstReg, off, true /*fAllocated*/);
5913 }
5914 else
5915 {
5916 /* Register argument variable: Avoid assertions in generic call code and load it the traditional way. */
5917 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off);
5918 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(pReNative, &off,
5919 a_fLivenessEflInput, a_fLivenessEflOutput);
5920 if (idxGstReg != UINT8_MAX)
5921 {
5922 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstReg);
5923 iemNativeRegFreeTmp(pReNative, idxGstReg);
5924 }
5925 else
5926 off = iemNativeEmitLoadGprWithGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, idxVarReg);
5927 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5928 }
5929 return off;
5930}
5931
5932
5933
5934/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5935 * start using it with custom native code emission (inlining assembly
5936 * instruction helpers). */
5937#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5938#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5939 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5940 off = iemNativeEmitCommitEFlags<true /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
5941 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5942 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5943
5944#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5945#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5946 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5947 off = iemNativeEmitCommitEFlags<false /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
5948 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5949 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5950
5951/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5952template<bool const a_fUpdateSkippingAndPostponing, uint32_t const a_fEflOutput,
5953 uint64_t const a_fLivenessEflInputBits, uint64_t const a_fLivenessEflOutputBits>
5954DECL_INLINE_THROW(uint32_t)
5955iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fElfInput)
5956{
5957 uint8_t const idxReg = iemNativeVarRegisterAcquireInited(pReNative, idxVarEFlags, &off);
5958 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5959
5960#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5961# ifdef VBOX_STRICT
5962 if ( pReNative->idxCurCall != 0
5963 && (a_fLivenessEflInputBits != 0 || a_fLivenessEflOutputBits != 0) /* for NOT these are both zero for now. */)
5964 {
5965 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5966# define ASSERT_ONE_EFL(a_idxField) \
5967 if RT_CONSTEXPR_IF(((a_fLivenessEflInputBits | a_fLivenessEflOutputBits) & RT_BIT_64(a_idxField)) != 0) \
5968 AssertMsg(!(a_fLivenessEflInputBits & RT_BIT_64(a_idxField)) \
5969 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5970 : !(a_fLivenessEflOutputBits & RT_BIT_64(a_idxField)) \
5971 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5972 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)), \
5973 ("%s - %u\n", #a_idxField, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5974 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
5975 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
5976 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
5977 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
5978 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
5979 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
5980 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
5981# undef ASSERT_ONE_EFL
5982 }
5983# endif
5984#endif
5985
5986#ifdef VBOX_STRICT
5987 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5988 uint32_t offFixup = off;
5989 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5990 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5991 iemNativeFixupFixedJump(pReNative, offFixup, off);
5992
5993 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5994 offFixup = off;
5995 off = iemNativeEmitJzToFixed(pReNative, off, off);
5996 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5997 iemNativeFixupFixedJump(pReNative, offFixup, off);
5998
5999 /** @todo validate that only bits in the a_fEflOutput mask changed. */
6000#endif
6001
6002#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6003 if RT_CONSTEXPR_IF(a_fUpdateSkippingAndPostponing)
6004 {
6005 Assert(!(pReNative->fSkippingEFlags & fElfInput)); RT_NOREF(fElfInput);
6006 if (pReNative->fSkippingEFlags)
6007 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitCommitEFlags)\n",
6008 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~(a_fEflOutput & X86_EFL_STATUS_BITS) ));
6009 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6010 pReNative->fSkippingEFlags = 0;
6011 else
6012 pReNative->fSkippingEFlags &= ~(a_fEflOutput & X86_EFL_STATUS_BITS);
6013# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6014 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6015 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6016 else
6017 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6018 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6019# endif
6020 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6021 }
6022#endif
6023
6024 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
6025 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, idxReg);
6026 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
6027 return off;
6028}
6029
6030
6031typedef enum IEMNATIVEMITEFLOP
6032{
6033 kIemNativeEmitEflOp_Set,
6034 kIemNativeEmitEflOp_Clear,
6035 kIemNativeEmitEflOp_Flip
6036} IEMNATIVEMITEFLOP;
6037
6038#define IEM_MC_SET_EFL_BIT(a_fBit) \
6039 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Set, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6040
6041#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
6042 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Clear, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6043
6044#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
6045 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Flip, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6046
6047/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
6048template<IEMNATIVEMITEFLOP const a_enmOp, uint32_t const a_fEflBit, uint64_t const a_fLivenessEflBit>
6049DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6050{
6051 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
6052 a_enmOp == kIemNativeEmitEflOp_Flip
6053 ? a_fLivenessEflBit : 0,
6054 a_fLivenessEflBit);
6055
6056 /* Using 'if constexpr' forces code elimination in debug builds with VC. */
6057 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Set)
6058 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6059 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Clear)
6060 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~a_fEflBit);
6061 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Flip)
6062 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6063 else
6064 AssertCompile( a_enmOp == kIemNativeEmitEflOp_Set /* AssertCompile(false) works with VC 2019 but not clang 15. */
6065 || a_enmOp == kIemNativeEmitEflOp_Clear
6066 || a_enmOp == kIemNativeEmitEflOp_Flip);
6067
6068 /** @todo No delayed writeback for EFLAGS right now. */
6069 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, idxEflReg);
6070
6071 /* Free but don't flush the EFLAGS register. */
6072 iemNativeRegFreeTmp(pReNative, idxEflReg);
6073
6074#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6075 /* Clear the bit in the skipped mask if we're clobbering and it's a status bit. */
6076 if RT_CONSTEXPR_IF( (a_enmOp == kIemNativeEmitEflOp_Set || a_enmOp == kIemNativeEmitEflOp_Clear)
6077 && (a_fEflBit & X86_EFL_STATUS_BITS))
6078 {
6079 if (pReNative->fSkippingEFlags)
6080 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitModifyEFlagsBit)\n",
6081 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflBit ));
6082 pReNative->fSkippingEFlags &= ~a_fEflBit;
6083# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6084 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~a_fEflBit, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6085# endif
6086 }
6087#endif
6088
6089 return off;
6090}
6091
6092
6093/*********************************************************************************************************************************
6094* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
6095*********************************************************************************************************************************/
6096
6097#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
6098 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
6099
6100#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
6101 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
6102
6103#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
6104 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
6105
6106
6107/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
6108 * IEM_MC_FETCH_SREG_ZX_U64. */
6109DECL_INLINE_THROW(uint32_t)
6110iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
6111{
6112 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6113 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
6114 Assert(iSReg < X86_SREG_COUNT);
6115
6116 /*
6117 * For now, we will not create a shadow copy of a selector. The rational
6118 * is that since we do not recompile the popping and loading of segment
6119 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
6120 * pushing and moving to registers, there is only a small chance that the
6121 * shadow copy will be accessed again before the register is reloaded. One
6122 * scenario would be nested called in 16-bit code, but I doubt it's worth
6123 * the extra register pressure atm.
6124 *
6125 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
6126 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
6127 * store scencario covered at present (r160730).
6128 */
6129 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6130 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6131 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
6132 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6133 return off;
6134}
6135
6136
6137
6138/*********************************************************************************************************************************
6139* Register references. *
6140*********************************************************************************************************************************/
6141
6142#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6143 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6144
6145#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
6146 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6147
6148/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6149DECL_INLINE_THROW(uint32_t)
6150iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6151{
6152 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6153 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6154 Assert(iGRegEx < 20);
6155
6156 if (iGRegEx < 16)
6157 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6158 else
6159 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6160
6161 /* If we've delayed writing back the register value, flush it now. */
6162 off = iemNativeRegFlushPendingSpecificWrite<kIemNativeGstRegRef_Gpr>(pReNative, off, iGRegEx & 15);
6163
6164 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6165 if (!fConst)
6166 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6167
6168 return off;
6169}
6170
6171#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6172 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6173
6174#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6175 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6176
6177#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6178 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6179
6180#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6181 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6182
6183#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
6184 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
6185
6186#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
6187 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
6188
6189#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
6190 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
6191
6192#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
6193 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
6194
6195#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
6196 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
6197
6198#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
6199 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
6200
6201/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
6202DECL_INLINE_THROW(uint32_t)
6203iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
6204{
6205 Assert(iGReg < 16);
6206 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
6207 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6208
6209 /* If we've delayed writing back the register value, flush it now. */
6210 off = iemNativeRegFlushPendingSpecificWrite<kIemNativeGstRegRef_Gpr>(pReNative, off, iGReg);
6211
6212 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6213 if (!fConst)
6214 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
6215
6216 return off;
6217}
6218
6219
6220#undef IEM_MC_REF_EFLAGS /* should not be used. */
6221#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
6222 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6223 off = iemNativeEmitRefEFlags<a_fEflOutput>(pReNative, off, a_pEFlags, a_fEflInput)
6224
6225/** Handles IEM_MC_REF_EFLAGS. */
6226template<uint32_t const a_fEflOutput>
6227DECL_INLINE_THROW(uint32_t)
6228iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput)
6229{
6230 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
6231 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6232
6233#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6234 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fEflInput);
6235 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
6236 if (pReNative->fSkippingEFlags)
6237 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitRefEFlags)\n",
6238 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflOutput ));
6239 pReNative->fSkippingEFlags &= ~a_fEflOutput;
6240# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6241
6242 /* Updating the skipping according to the outputs is a little early, but
6243 we don't have any other hooks for references atm. */
6244 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6245 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6246 else if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) != 0)
6247 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6248 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6249# endif
6250
6251 /* This ASSUMES that EFLAGS references are not taken before use. */
6252 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6253
6254#endif
6255 RT_NOREF(fEflInput);
6256
6257 /* If we've delayed writing back the register value, flush it now. */
6258 off = iemNativeRegFlushPendingSpecificWrite<kIemNativeGstRegRef_EFlags>(pReNative, off, 0);
6259
6260 /* If there is a shadow copy of guest EFLAGS, flush it now. */
6261 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
6262
6263 return off;
6264}
6265
6266
6267/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
6268 * different code from threaded recompiler, maybe it would be helpful. For now
6269 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
6270#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
6271
6272
6273#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
6274 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
6275
6276#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
6277 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
6278
6279#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
6280 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
6281
6282#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
6283 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
6284
6285/* Just being paranoid here. */
6286#ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
6287AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
6288AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
6289AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
6290AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
6291#endif
6292AssertCompileMemberOffset(X86XMMREG, au64, 0);
6293AssertCompileMemberOffset(X86XMMREG, au32, 0);
6294AssertCompileMemberOffset(X86XMMREG, ar64, 0);
6295AssertCompileMemberOffset(X86XMMREG, ar32, 0);
6296
6297#define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
6298 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
6299#define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
6300 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
6301#define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
6302 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
6303#define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
6304 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
6305
6306/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
6307DECL_INLINE_THROW(uint32_t)
6308iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
6309{
6310 Assert(iXReg < 16);
6311 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
6312 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6313
6314 /* If we've delayed writing back the register value, flush it now. */
6315 off = iemNativeRegFlushPendingSpecificWrite<kIemNativeGstRegRef_XReg>(pReNative, off, iXReg);
6316
6317 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6318 if (!fConst)
6319 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
6320
6321 return off;
6322}
6323
6324
6325
6326/*********************************************************************************************************************************
6327* Effective Address Calculation *
6328*********************************************************************************************************************************/
6329#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6330 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6331
6332/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6333 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6334DECL_INLINE_THROW(uint32_t)
6335iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6336 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6337{
6338 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6339
6340 /*
6341 * Handle the disp16 form with no registers first.
6342 *
6343 * Convert to an immediate value, as that'll delay the register allocation
6344 * and assignment till the memory access / call / whatever and we can use
6345 * a more appropriate register (or none at all).
6346 */
6347 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6348 {
6349 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
6350 return off;
6351 }
6352
6353 /* Determin the displacment. */
6354 uint16_t u16EffAddr;
6355 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6356 {
6357 case 0: u16EffAddr = 0; break;
6358 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6359 case 2: u16EffAddr = u16Disp; break;
6360 default: AssertFailedStmt(u16EffAddr = 0);
6361 }
6362
6363 /* Determine the registers involved. */
6364 uint8_t idxGstRegBase;
6365 uint8_t idxGstRegIndex;
6366 switch (bRm & X86_MODRM_RM_MASK)
6367 {
6368 case 0:
6369 idxGstRegBase = X86_GREG_xBX;
6370 idxGstRegIndex = X86_GREG_xSI;
6371 break;
6372 case 1:
6373 idxGstRegBase = X86_GREG_xBX;
6374 idxGstRegIndex = X86_GREG_xDI;
6375 break;
6376 case 2:
6377 idxGstRegBase = X86_GREG_xBP;
6378 idxGstRegIndex = X86_GREG_xSI;
6379 break;
6380 case 3:
6381 idxGstRegBase = X86_GREG_xBP;
6382 idxGstRegIndex = X86_GREG_xDI;
6383 break;
6384 case 4:
6385 idxGstRegBase = X86_GREG_xSI;
6386 idxGstRegIndex = UINT8_MAX;
6387 break;
6388 case 5:
6389 idxGstRegBase = X86_GREG_xDI;
6390 idxGstRegIndex = UINT8_MAX;
6391 break;
6392 case 6:
6393 idxGstRegBase = X86_GREG_xBP;
6394 idxGstRegIndex = UINT8_MAX;
6395 break;
6396#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6397 default:
6398#endif
6399 case 7:
6400 idxGstRegBase = X86_GREG_xBX;
6401 idxGstRegIndex = UINT8_MAX;
6402 break;
6403 }
6404
6405 /*
6406 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6407 */
6408 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6409 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6410 kIemNativeGstRegUse_ReadOnly);
6411 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6412 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6413 kIemNativeGstRegUse_ReadOnly)
6414 : UINT8_MAX;
6415#ifdef RT_ARCH_AMD64
6416 if (idxRegIndex == UINT8_MAX)
6417 {
6418 if (u16EffAddr == 0)
6419 {
6420 /* movxz ret, base */
6421 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6422 }
6423 else
6424 {
6425 /* lea ret32, [base64 + disp32] */
6426 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6427 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6428 if (idxRegRet >= 8 || idxRegBase >= 8)
6429 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6430 pbCodeBuf[off++] = 0x8d;
6431 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6432 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6433 else
6434 {
6435 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6436 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6437 }
6438 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6439 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6440 pbCodeBuf[off++] = 0;
6441 pbCodeBuf[off++] = 0;
6442 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6443
6444 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6445 }
6446 }
6447 else
6448 {
6449 /* lea ret32, [index64 + base64 (+ disp32)] */
6450 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6451 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6452 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6453 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6454 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6455 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6456 pbCodeBuf[off++] = 0x8d;
6457 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6458 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6459 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6460 if (bMod == X86_MOD_MEM4)
6461 {
6462 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6463 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6464 pbCodeBuf[off++] = 0;
6465 pbCodeBuf[off++] = 0;
6466 }
6467 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6468 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6469 }
6470
6471#elif defined(RT_ARCH_ARM64)
6472 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6473 if (u16EffAddr == 0)
6474 {
6475 if (idxRegIndex == UINT8_MAX)
6476 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
6477 else
6478 {
6479 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
6480 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6481 }
6482 }
6483 else
6484 {
6485 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
6486 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
6487 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
6488 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6489 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
6490 else
6491 {
6492 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
6493 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6494 }
6495 if (idxRegIndex != UINT8_MAX)
6496 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
6497 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6498 }
6499
6500#else
6501# error "port me"
6502#endif
6503
6504 if (idxRegIndex != UINT8_MAX)
6505 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6506 iemNativeRegFreeTmp(pReNative, idxRegBase);
6507 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6508 return off;
6509}
6510
6511
6512#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6513 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6514
6515/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
6516 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
6517DECL_INLINE_THROW(uint32_t)
6518iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6519 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
6520{
6521 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6522
6523 /*
6524 * Handle the disp32 form with no registers first.
6525 *
6526 * Convert to an immediate value, as that'll delay the register allocation
6527 * and assignment till the memory access / call / whatever and we can use
6528 * a more appropriate register (or none at all).
6529 */
6530 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6531 {
6532 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
6533 return off;
6534 }
6535
6536 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6537 uint32_t u32EffAddr = 0;
6538 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6539 {
6540 case 0: break;
6541 case 1: u32EffAddr = (int8_t)u32Disp; break;
6542 case 2: u32EffAddr = u32Disp; break;
6543 default: AssertFailed();
6544 }
6545
6546 /* Get the register (or SIB) value. */
6547 uint8_t idxGstRegBase = UINT8_MAX;
6548 uint8_t idxGstRegIndex = UINT8_MAX;
6549 uint8_t cShiftIndex = 0;
6550 switch (bRm & X86_MODRM_RM_MASK)
6551 {
6552 case 0: idxGstRegBase = X86_GREG_xAX; break;
6553 case 1: idxGstRegBase = X86_GREG_xCX; break;
6554 case 2: idxGstRegBase = X86_GREG_xDX; break;
6555 case 3: idxGstRegBase = X86_GREG_xBX; break;
6556 case 4: /* SIB */
6557 {
6558 /* index /w scaling . */
6559 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6560 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6561 {
6562 case 0: idxGstRegIndex = X86_GREG_xAX; break;
6563 case 1: idxGstRegIndex = X86_GREG_xCX; break;
6564 case 2: idxGstRegIndex = X86_GREG_xDX; break;
6565 case 3: idxGstRegIndex = X86_GREG_xBX; break;
6566 case 4: cShiftIndex = 0; /*no index*/ break;
6567 case 5: idxGstRegIndex = X86_GREG_xBP; break;
6568 case 6: idxGstRegIndex = X86_GREG_xSI; break;
6569 case 7: idxGstRegIndex = X86_GREG_xDI; break;
6570 }
6571
6572 /* base */
6573 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
6574 {
6575 case 0: idxGstRegBase = X86_GREG_xAX; break;
6576 case 1: idxGstRegBase = X86_GREG_xCX; break;
6577 case 2: idxGstRegBase = X86_GREG_xDX; break;
6578 case 3: idxGstRegBase = X86_GREG_xBX; break;
6579 case 4:
6580 idxGstRegBase = X86_GREG_xSP;
6581 u32EffAddr += uSibAndRspOffset >> 8;
6582 break;
6583 case 5:
6584 if ((bRm & X86_MODRM_MOD_MASK) != 0)
6585 idxGstRegBase = X86_GREG_xBP;
6586 else
6587 {
6588 Assert(u32EffAddr == 0);
6589 u32EffAddr = u32Disp;
6590 }
6591 break;
6592 case 6: idxGstRegBase = X86_GREG_xSI; break;
6593 case 7: idxGstRegBase = X86_GREG_xDI; break;
6594 }
6595 break;
6596 }
6597 case 5: idxGstRegBase = X86_GREG_xBP; break;
6598 case 6: idxGstRegBase = X86_GREG_xSI; break;
6599 case 7: idxGstRegBase = X86_GREG_xDI; break;
6600 }
6601
6602 /*
6603 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6604 * the start of the function.
6605 */
6606 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6607 {
6608 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
6609 return off;
6610 }
6611
6612 /*
6613 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6614 */
6615 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6616 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6617 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6618 kIemNativeGstRegUse_ReadOnly);
6619 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6620 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6621 kIemNativeGstRegUse_ReadOnly);
6622
6623 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6624 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6625 {
6626 idxRegBase = idxRegIndex;
6627 idxRegIndex = UINT8_MAX;
6628 }
6629
6630#ifdef RT_ARCH_AMD64
6631 if (idxRegIndex == UINT8_MAX)
6632 {
6633 if (u32EffAddr == 0)
6634 {
6635 /* mov ret, base */
6636 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6637 }
6638 else
6639 {
6640 /* lea ret32, [base64 + disp32] */
6641 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6642 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6643 if (idxRegRet >= 8 || idxRegBase >= 8)
6644 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6645 pbCodeBuf[off++] = 0x8d;
6646 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6647 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6648 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6649 else
6650 {
6651 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6652 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6653 }
6654 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6655 if (bMod == X86_MOD_MEM4)
6656 {
6657 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6658 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6659 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6660 }
6661 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6662 }
6663 }
6664 else
6665 {
6666 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6667 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6668 if (idxRegBase == UINT8_MAX)
6669 {
6670 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
6671 if (idxRegRet >= 8 || idxRegIndex >= 8)
6672 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6673 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6674 pbCodeBuf[off++] = 0x8d;
6675 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6676 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6677 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6678 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6679 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6680 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6681 }
6682 else
6683 {
6684 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6685 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6686 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6687 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6688 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6689 pbCodeBuf[off++] = 0x8d;
6690 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6691 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6692 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6693 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6694 if (bMod != X86_MOD_MEM0)
6695 {
6696 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6697 if (bMod == X86_MOD_MEM4)
6698 {
6699 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6700 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6701 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6702 }
6703 }
6704 }
6705 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6706 }
6707
6708#elif defined(RT_ARCH_ARM64)
6709 if (u32EffAddr == 0)
6710 {
6711 if (idxRegIndex == UINT8_MAX)
6712 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6713 else if (idxRegBase == UINT8_MAX)
6714 {
6715 if (cShiftIndex == 0)
6716 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
6717 else
6718 {
6719 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6720 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6721 }
6722 }
6723 else
6724 {
6725 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6726 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6727 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6728 }
6729 }
6730 else
6731 {
6732 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6733 {
6734 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6735 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6736 }
6737 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6738 {
6739 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6740 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6741 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6742 }
6743 else
6744 {
6745 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6746 if (idxRegBase != UINT8_MAX)
6747 {
6748 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6749 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6750 }
6751 }
6752 if (idxRegIndex != UINT8_MAX)
6753 {
6754 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6755 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6756 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6757 }
6758 }
6759
6760#else
6761# error "port me"
6762#endif
6763
6764 if (idxRegIndex != UINT8_MAX)
6765 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6766 if (idxRegBase != UINT8_MAX)
6767 iemNativeRegFreeTmp(pReNative, idxRegBase);
6768 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6769 return off;
6770}
6771
6772
6773#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6774 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6775 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6776
6777#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6778 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6779 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6780
6781#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6782 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6783 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6784
6785/**
6786 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6787 *
6788 * @returns New off.
6789 * @param pReNative .
6790 * @param off .
6791 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6792 * bit 4 to REX.X. The two bits are part of the
6793 * REG sub-field, which isn't needed in this
6794 * function.
6795 * @param uSibAndRspOffset Two parts:
6796 * - The first 8 bits make up the SIB byte.
6797 * - The next 8 bits are the fixed RSP/ESP offset
6798 * in case of a pop [xSP].
6799 * @param u32Disp The displacement byte/word/dword, if any.
6800 * @param cbInstr The size of the fully decoded instruction. Used
6801 * for RIP relative addressing.
6802 * @param idxVarRet The result variable number.
6803 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6804 * when calculating the address.
6805 *
6806 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6807 */
6808DECL_INLINE_THROW(uint32_t)
6809iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6810 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6811{
6812 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6813
6814 /*
6815 * Special case the rip + disp32 form first.
6816 */
6817 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6818 {
6819 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6820 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6821 kIemNativeGstRegUse_ReadOnly);
6822 if (f64Bit)
6823 {
6824#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6825 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr + (int64_t)pReNative->Core.offPc;
6826#else
6827 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6828#endif
6829#ifdef RT_ARCH_AMD64
6830 if ((int32_t)offFinalDisp == offFinalDisp)
6831 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6832 else
6833 {
6834 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6835 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6836 }
6837#else
6838 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6839#endif
6840 }
6841 else
6842 {
6843# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6844 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr + (int32_t)pReNative->Core.offPc;
6845# else
6846 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr;
6847# endif
6848 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6849 }
6850 iemNativeRegFreeTmp(pReNative, idxRegPc);
6851 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6852 return off;
6853 }
6854
6855 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6856 int64_t i64EffAddr = 0;
6857 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6858 {
6859 case 0: break;
6860 case 1: i64EffAddr = (int8_t)u32Disp; break;
6861 case 2: i64EffAddr = (int32_t)u32Disp; break;
6862 default: AssertFailed();
6863 }
6864
6865 /* Get the register (or SIB) value. */
6866 uint8_t idxGstRegBase = UINT8_MAX;
6867 uint8_t idxGstRegIndex = UINT8_MAX;
6868 uint8_t cShiftIndex = 0;
6869 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6870 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6871 else /* SIB: */
6872 {
6873 /* index /w scaling . */
6874 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6875 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6876 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6877 if (idxGstRegIndex == 4)
6878 {
6879 /* no index */
6880 cShiftIndex = 0;
6881 idxGstRegIndex = UINT8_MAX;
6882 }
6883
6884 /* base */
6885 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6886 if (idxGstRegBase == 4)
6887 {
6888 /* pop [rsp] hack */
6889 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6890 }
6891 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6892 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6893 {
6894 /* mod=0 and base=5 -> disp32, no base reg. */
6895 Assert(i64EffAddr == 0);
6896 i64EffAddr = (int32_t)u32Disp;
6897 idxGstRegBase = UINT8_MAX;
6898 }
6899 }
6900
6901 /*
6902 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6903 * the start of the function.
6904 */
6905 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6906 {
6907 if (f64Bit)
6908 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6909 else
6910 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6911 return off;
6912 }
6913
6914 /*
6915 * Now emit code that calculates:
6916 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6917 * or if !f64Bit:
6918 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6919 */
6920 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6921 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6922 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6923 kIemNativeGstRegUse_ReadOnly);
6924 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6925 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6926 kIemNativeGstRegUse_ReadOnly);
6927
6928 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6929 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6930 {
6931 idxRegBase = idxRegIndex;
6932 idxRegIndex = UINT8_MAX;
6933 }
6934
6935#ifdef RT_ARCH_AMD64
6936 uint8_t bFinalAdj;
6937 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6938 bFinalAdj = 0; /* likely */
6939 else
6940 {
6941 /* pop [rsp] with a problematic disp32 value. Split out the
6942 RSP offset and add it separately afterwards (bFinalAdj). */
6943 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6944 Assert(idxGstRegBase == X86_GREG_xSP);
6945 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6946 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6947 Assert(bFinalAdj != 0);
6948 i64EffAddr -= bFinalAdj;
6949 Assert((int32_t)i64EffAddr == i64EffAddr);
6950 }
6951 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6952//pReNative->pInstrBuf[off++] = 0xcc;
6953
6954 if (idxRegIndex == UINT8_MAX)
6955 {
6956 if (u32EffAddr == 0)
6957 {
6958 /* mov ret, base */
6959 if (f64Bit)
6960 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6961 else
6962 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6963 }
6964 else
6965 {
6966 /* lea ret, [base + disp32] */
6967 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6968 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6969 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6970 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6971 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6972 | (f64Bit ? X86_OP_REX_W : 0);
6973 pbCodeBuf[off++] = 0x8d;
6974 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6975 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6976 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6977 else
6978 {
6979 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6980 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6981 }
6982 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6983 if (bMod == X86_MOD_MEM4)
6984 {
6985 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6986 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6987 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6988 }
6989 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6990 }
6991 }
6992 else
6993 {
6994 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6995 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6996 if (idxRegBase == UINT8_MAX)
6997 {
6998 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6999 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
7000 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7001 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7002 | (f64Bit ? X86_OP_REX_W : 0);
7003 pbCodeBuf[off++] = 0x8d;
7004 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
7005 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
7006 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7007 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7008 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7009 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7010 }
7011 else
7012 {
7013 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
7014 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
7015 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7016 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7017 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7018 | (f64Bit ? X86_OP_REX_W : 0);
7019 pbCodeBuf[off++] = 0x8d;
7020 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
7021 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7022 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7023 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
7024 if (bMod != X86_MOD_MEM0)
7025 {
7026 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7027 if (bMod == X86_MOD_MEM4)
7028 {
7029 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7030 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7031 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7032 }
7033 }
7034 }
7035 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7036 }
7037
7038 if (!bFinalAdj)
7039 { /* likely */ }
7040 else
7041 {
7042 Assert(f64Bit);
7043 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
7044 }
7045
7046#elif defined(RT_ARCH_ARM64)
7047 if (i64EffAddr == 0)
7048 {
7049 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7050 if (idxRegIndex == UINT8_MAX)
7051 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
7052 else if (idxRegBase != UINT8_MAX)
7053 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
7054 f64Bit, false /*fSetFlags*/, cShiftIndex);
7055 else
7056 {
7057 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
7058 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
7059 }
7060 }
7061 else
7062 {
7063 if (f64Bit)
7064 { /* likely */ }
7065 else
7066 i64EffAddr = (int32_t)i64EffAddr;
7067
7068 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
7069 {
7070 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7071 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
7072 }
7073 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
7074 {
7075 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7076 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
7077 }
7078 else
7079 {
7080 if (f64Bit)
7081 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
7082 else
7083 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
7084 if (idxRegBase != UINT8_MAX)
7085 {
7086 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7087 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
7088 }
7089 }
7090 if (idxRegIndex != UINT8_MAX)
7091 {
7092 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7093 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
7094 f64Bit, false /*fSetFlags*/, cShiftIndex);
7095 }
7096 }
7097
7098#else
7099# error "port me"
7100#endif
7101
7102 if (idxRegIndex != UINT8_MAX)
7103 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7104 if (idxRegBase != UINT8_MAX)
7105 iemNativeRegFreeTmp(pReNative, idxRegBase);
7106 iemNativeVarRegisterRelease(pReNative, idxVarRet);
7107 return off;
7108}
7109
7110
7111/*********************************************************************************************************************************
7112* Memory fetches and stores common *
7113*********************************************************************************************************************************/
7114
7115typedef enum IEMNATIVEMITMEMOP
7116{
7117 kIemNativeEmitMemOp_Store = 0,
7118 kIemNativeEmitMemOp_Fetch,
7119 kIemNativeEmitMemOp_Fetch_Zx_U16,
7120 kIemNativeEmitMemOp_Fetch_Zx_U32,
7121 kIemNativeEmitMemOp_Fetch_Zx_U64,
7122 kIemNativeEmitMemOp_Fetch_Sx_U16,
7123 kIemNativeEmitMemOp_Fetch_Sx_U32,
7124 kIemNativeEmitMemOp_Fetch_Sx_U64
7125} IEMNATIVEMITMEMOP;
7126
7127/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
7128 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
7129 * (with iSegReg = UINT8_MAX). */
7130template<uint8_t const a_cbMem, uint32_t const a_fAlignMaskAndCtl, IEMNATIVEMITMEMOP const a_enmOp, bool a_fFlat = false>
7131DECL_INLINE_THROW(uint32_t)
7132iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
7133 uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
7134{
7135 /*
7136 * Assert sanity.
7137 */
7138 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7139 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7140 Assert( a_enmOp != kIemNativeEmitMemOp_Store
7141 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7142 || pVarValue->enmKind == kIemNativeVarKind_Stack);
7143 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7144 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7145 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7146 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7147 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7148 Assert(!a_fFlat ? iSegReg < 6 : iSegReg == UINT8_MAX);
7149 AssertCompile( a_cbMem == 1 || a_cbMem == 2 || a_cbMem == 4 || a_cbMem == 8
7150 || a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U));
7151 AssertCompile(!(a_fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
7152 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7153#ifdef VBOX_STRICT
7154 if (iSegReg == UINT8_MAX)
7155 {
7156 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7157 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7158 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7159 switch (a_cbMem)
7160 {
7161 case 1:
7162 Assert( pfnFunction
7163 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
7164 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7165 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7166 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7167 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7168 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
7169 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
7170 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
7171 : UINT64_C(0xc000b000a0009000) ));
7172 Assert(!a_fAlignMaskAndCtl);
7173 break;
7174 case 2:
7175 Assert( pfnFunction
7176 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
7177 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7178 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7179 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7180 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
7181 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
7182 : UINT64_C(0xc000b000a0009000) ));
7183 Assert(a_fAlignMaskAndCtl <= 1);
7184 break;
7185 case 4:
7186 Assert( pfnFunction
7187 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
7188 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7189 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7190 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
7191 : UINT64_C(0xc000b000a0009000) ));
7192 Assert(a_fAlignMaskAndCtl <= 3);
7193 break;
7194 case 8:
7195 Assert( pfnFunction
7196 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
7197 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
7198 : UINT64_C(0xc000b000a0009000) ));
7199 Assert(a_fAlignMaskAndCtl <= 7);
7200 break;
7201 case sizeof(RTUINT128U):
7202 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7203 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
7204 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7205 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
7206 || ( a_enmOp == kIemNativeEmitMemOp_Store
7207 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7208 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
7209 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7210 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7211 ? (a_fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (a_fAlignMaskAndCtl & 0xff) == 15
7212 : a_fAlignMaskAndCtl <= 15U);
7213 break;
7214 case sizeof(RTUINT256U):
7215 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7216 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
7217 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
7218 || ( a_enmOp == kIemNativeEmitMemOp_Store
7219 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
7220 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
7221 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
7222 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
7223 ? (a_fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (a_fAlignMaskAndCtl & 0xff) == 31
7224 : a_fAlignMaskAndCtl <= 31);
7225 break;
7226 }
7227 }
7228 else
7229 {
7230 Assert(iSegReg < 6);
7231 switch (a_cbMem)
7232 {
7233 case 1:
7234 Assert( pfnFunction
7235 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
7236 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
7237 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7238 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7239 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7240 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
7241 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
7242 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
7243 : UINT64_C(0xc000b000a0009000) ));
7244 Assert(!a_fAlignMaskAndCtl);
7245 break;
7246 case 2:
7247 Assert( pfnFunction
7248 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
7249 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
7250 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7251 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7252 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
7253 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
7254 : UINT64_C(0xc000b000a0009000) ));
7255 Assert(a_fAlignMaskAndCtl <= 1);
7256 break;
7257 case 4:
7258 Assert( pfnFunction
7259 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
7260 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
7261 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
7262 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
7263 : UINT64_C(0xc000b000a0009000) ));
7264 Assert(a_fAlignMaskAndCtl <= 3);
7265 break;
7266 case 8:
7267 Assert( pfnFunction
7268 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
7269 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
7270 : UINT64_C(0xc000b000a0009000) ));
7271 Assert(a_fAlignMaskAndCtl <= 7);
7272 break;
7273 case sizeof(RTUINT128U):
7274 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7275 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
7276 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7277 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
7278 || ( a_enmOp == kIemNativeEmitMemOp_Store
7279 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7280 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
7281 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7282 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7283 ? (a_fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (a_fAlignMaskAndCtl & 0xff) == 15
7284 : a_fAlignMaskAndCtl <= 15);
7285 break;
7286 case sizeof(RTUINT256U):
7287 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7288 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
7289 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
7290 || ( a_enmOp == kIemNativeEmitMemOp_Store
7291 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
7292 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
7293 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
7294 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
7295 ? (a_fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (a_fAlignMaskAndCtl & 0xff) == 31
7296 : a_fAlignMaskAndCtl <= 31);
7297 break;
7298 }
7299 }
7300#endif
7301
7302#ifdef VBOX_STRICT
7303 /*
7304 * Check that the fExec flags we've got make sense.
7305 */
7306 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7307#endif
7308
7309 /*
7310 * To keep things simple we have to commit any pending writes first as we
7311 * may end up making calls.
7312 */
7313 /** @todo we could postpone this till we make the call and reload the
7314 * registers after returning from the call. Not sure if that's sensible or
7315 * not, though. */
7316#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7317 off = iemNativeRegFlushPendingWrites(pReNative, off);
7318#else
7319 /* The program counter is treated differently for now. */
7320 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
7321#endif
7322
7323#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7324 /*
7325 * Move/spill/flush stuff out of call-volatile registers.
7326 * This is the easy way out. We could contain this to the tlb-miss branch
7327 * by saving and restoring active stuff here.
7328 */
7329 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7330#endif
7331
7332 /*
7333 * Define labels and allocate the result register (trying for the return
7334 * register if we can).
7335 */
7336 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7337 RT_CONSTEXPR
7338 bool const fSimdRegValues = a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U);
7339 uint8_t const idxRegValueFetch = a_enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7340 : fSimdRegValues
7341 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off)
7342 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7343 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7344 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7345 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, a_fFlat, a_cbMem, offDisp);
7346 uint8_t const idxRegValueStore = a_enmOp != kIemNativeEmitMemOp_Store
7347 || TlbState.fSkip
7348 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7349 ? UINT8_MAX
7350 : fSimdRegValues
7351 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7352 : iemNativeVarRegisterAcquireInited(pReNative, idxVarValue, &off);
7353 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7354 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7355 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7356 : UINT32_MAX;
7357
7358 /*
7359 * Jump to the TLB lookup code.
7360 */
7361 if (!TlbState.fSkip)
7362 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7363
7364 /*
7365 * TlbMiss:
7366 *
7367 * Call helper to do the fetching.
7368 * We flush all guest register shadow copies here.
7369 */
7370 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7371
7372#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7373 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7374#else
7375 RT_NOREF(idxInstr);
7376#endif
7377
7378#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7379 if (pReNative->Core.offPc)
7380 {
7381 /*
7382 * Update the program counter but restore it at the end of the TlbMiss branch.
7383 * This should allow delaying more program counter updates for the TlbLookup and hit paths
7384 * which are hopefully much more frequent, reducing the amount of memory accesses.
7385 */
7386 /* Allocate a temporary PC register. */
7387/** @todo r=bird: This would technically need to be done up front as it's a register allocation. */
7388 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7389 kIemNativeGstRegUse_ForUpdate);
7390
7391 /* Perform the addition and store the result. */
7392 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7393 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
7394# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
7395 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
7396# endif
7397
7398 /* Free and flush the PC register. */
7399 iemNativeRegFreeTmp(pReNative, idxPcReg);
7400 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7401 }
7402#endif
7403
7404#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7405 /* Save variables in volatile registers. */
7406 uint32_t const fHstGprsNotToSave = TlbState.getRegsNotToSave()
7407 | (idxRegMemResult < 32 ? RT_BIT_32(idxRegMemResult) : 0)
7408#ifdef _MSC_VER /* Workaround for stupid compiler (2019). */
7409 | (idxRegValueFetch < 32 && !fSimdRegValues ? RT_BIT_32(idxRegValueFetch & 0x1f) : 0);
7410#else
7411 | (idxRegValueFetch < 32 && !fSimdRegValues ? RT_BIT_32(idxRegValueFetch) : 0);
7412#endif
7413 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstGprsNotToSave);
7414#endif
7415
7416 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
7417 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
7418 if RT_CONSTEXPR_IF(fSimdRegValues)
7419 {
7420 /*
7421 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
7422 *
7423 * Note! There was a register variable assigned to the variable for the TlbLookup case above
7424 * which must not be freed or the value loaded into the register will not be synced into the register
7425 * further down the road because the variable doesn't know it had a variable assigned.
7426 *
7427 * Note! For loads it is not required to sync what is in the assigned register with the stack slot
7428 * as it will be overwritten anyway.
7429 */
7430 uint8_t const idxRegArgValue = a_fFlat ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7431 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
7432 a_enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
7433 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7434 }
7435 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitMemOp_Store)
7436 {
7437 uint8_t const idxRegArgValue = a_fFlat ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7438 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
7439#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7440 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7441#else
7442 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7443 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7444#endif
7445 }
7446
7447 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
7448 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
7449#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7450 fVolGregMask);
7451#else
7452 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
7453#endif
7454
7455 if RT_CONSTEXPR_IF(!a_fFlat)
7456 {
7457 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
7458 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7459 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
7460 }
7461
7462#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
7463 /* Do delayed EFLAGS calculations. */
7464 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitMemOp_Store || fSimdRegValues)
7465 {
7466 if RT_CONSTEXPR_IF(a_fFlat)
7467 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7468 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7469 fHstGprsNotToSave);
7470 else
7471 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7472 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
7473 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
7474 fHstGprsNotToSave);
7475 }
7476 else if RT_CONSTEXPR_IF(a_fFlat)
7477 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState,
7478 fHstGprsNotToSave);
7479 else
7480 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7481 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7482 fHstGprsNotToSave);
7483#endif
7484
7485 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7486 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7487
7488 /* Done setting up parameters, make the call. */
7489 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
7490
7491 /*
7492 * Put the result in the right register if this is a fetch.
7493 */
7494 if RT_CONSTEXPR_IF(a_enmOp != kIemNativeEmitMemOp_Store)
7495 {
7496 if RT_CONSTEXPR_IF(fSimdRegValues)
7497 {
7498 Assert(a_enmOp == kIemNativeEmitMemOp_Fetch);
7499
7500 /* Sync the value on the stack with the host register assigned to the variable. */
7501 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
7502 }
7503 else
7504 {
7505 Assert(idxRegValueFetch == pVarValue->idxReg);
7506 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7507 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
7508 }
7509 }
7510
7511#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7512 /* Restore variables and guest shadow registers to volatile registers. */
7513 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstGprsNotToSave);
7514 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7515#endif
7516
7517#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7518 if (pReNative->Core.offPc)
7519 {
7520 /*
7521 * Time to restore the program counter to its original value.
7522 */
7523 /* Allocate a temporary PC register. */
7524 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7525 kIemNativeGstRegUse_ForUpdate);
7526
7527 /* Restore the original value. */
7528 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7529 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
7530
7531 /* Free and flush the PC register. */
7532 iemNativeRegFreeTmp(pReNative, idxPcReg);
7533 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7534 }
7535#endif
7536
7537#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7538 if (!TlbState.fSkip)
7539 {
7540 /* end of TlbMiss - Jump to the done label. */
7541 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7542 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7543
7544 /*
7545 * TlbLookup:
7546 */
7547 off = iemNativeEmitTlbLookup<true, a_cbMem, a_fAlignMaskAndCtl,
7548 a_enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ
7549 >(pReNative, off, &TlbState, iSegReg, idxLabelTlbLookup, idxLabelTlbMiss,
7550 idxRegMemResult, offDisp);
7551
7552 /*
7553 * Emit code to do the actual storing / fetching.
7554 */
7555 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7556# ifdef IEM_WITH_TLB_STATISTICS
7557 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7558 a_enmOp == kIemNativeEmitMemOp_Store
7559 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
7560 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
7561# endif
7562 switch (a_enmOp)
7563 {
7564 case kIemNativeEmitMemOp_Store:
7565 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
7566 {
7567 switch (a_cbMem)
7568 {
7569 case 1:
7570 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7571 break;
7572 case 2:
7573 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7574 break;
7575 case 4:
7576 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7577 break;
7578 case 8:
7579 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7580 break;
7581 case sizeof(RTUINT128U):
7582 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7583 break;
7584 case sizeof(RTUINT256U):
7585 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7586 break;
7587 default:
7588 AssertFailed();
7589 }
7590 }
7591 else
7592 {
7593 switch (a_cbMem)
7594 {
7595 case 1:
7596 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
7597 idxRegMemResult, TlbState.idxReg1);
7598 break;
7599 case 2:
7600 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7601 idxRegMemResult, TlbState.idxReg1);
7602 break;
7603 case 4:
7604 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7605 idxRegMemResult, TlbState.idxReg1);
7606 break;
7607 case 8:
7608 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
7609 idxRegMemResult, TlbState.idxReg1);
7610 break;
7611 default:
7612 AssertFailed();
7613 }
7614 }
7615 break;
7616
7617 case kIemNativeEmitMemOp_Fetch:
7618 case kIemNativeEmitMemOp_Fetch_Zx_U16:
7619 case kIemNativeEmitMemOp_Fetch_Zx_U32:
7620 case kIemNativeEmitMemOp_Fetch_Zx_U64:
7621 switch (a_cbMem)
7622 {
7623 case 1:
7624 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7625 break;
7626 case 2:
7627 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7628 break;
7629 case 4:
7630 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7631 break;
7632 case 8:
7633 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7634 break;
7635 case sizeof(RTUINT128U):
7636 /*
7637 * No need to sync back the register with the stack, this is done by the generic variable handling
7638 * code if there is a register assigned to a variable and the stack must be accessed.
7639 */
7640 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7641 break;
7642 case sizeof(RTUINT256U):
7643 /*
7644 * No need to sync back the register with the stack, this is done by the generic variable handling
7645 * code if there is a register assigned to a variable and the stack must be accessed.
7646 */
7647 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7648 break;
7649 default:
7650 AssertFailed();
7651 }
7652 break;
7653
7654 case kIemNativeEmitMemOp_Fetch_Sx_U16:
7655 Assert(a_cbMem == 1);
7656 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7657 break;
7658
7659 case kIemNativeEmitMemOp_Fetch_Sx_U32:
7660 Assert(a_cbMem == 1 || a_cbMem == 2);
7661 if (a_cbMem == 1)
7662 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7663 else
7664 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7665 break;
7666
7667 case kIemNativeEmitMemOp_Fetch_Sx_U64:
7668 switch (a_cbMem)
7669 {
7670 case 1:
7671 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7672 break;
7673 case 2:
7674 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7675 break;
7676 case 4:
7677 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7678 break;
7679 default:
7680 AssertFailed();
7681 }
7682 break;
7683
7684 default:
7685 AssertFailed();
7686 }
7687
7688 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7689
7690 /*
7691 * TlbDone:
7692 */
7693 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7694
7695 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7696
7697# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7698 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7699 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7700# endif
7701 }
7702#else
7703 RT_NOREF(idxLabelTlbMiss);
7704#endif
7705
7706 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7707 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7708 return off;
7709}
7710
7711
7712
7713/*********************************************************************************************************************************
7714* Memory fetches (IEM_MEM_FETCH_XXX). *
7715*********************************************************************************************************************************/
7716
7717/* 8-bit segmented: */
7718#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7719 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch>( \
7720 pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7721
7722#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7723 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16>( \
7724 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7725
7726#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7727 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32>( \
7728 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7729
7730#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7731 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64>( \
7732 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7733
7734#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7735 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16>(\
7736 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7737
7738#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7739 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7740 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7741
7742#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7743 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7744 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7745
7746/* 16-bit segmented: */
7747#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7748 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7749 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7750
7751#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7752 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7753 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7754
7755#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7756 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32>(\
7757 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7758
7759#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7760 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64>(\
7761 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7762
7763#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7764 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7765 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7766
7767#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7768 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7769 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7770
7771
7772/* 32-bit segmented: */
7773#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7774 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7775 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7776
7777#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7778 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7779 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7780
7781#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7782 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64>(\
7783 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7784
7785#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7786 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7787 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7788
7789#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7790 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7791 pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7792
7793#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7794 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7795 pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, \
7796 a_offDisp)
7797
7798#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7799 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7800 pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7801
7802#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7803 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7804 pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7805
7806#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7807 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7808 pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7809
7810AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7811#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7812 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch>(\
7813 pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7814
7815
7816/* 64-bit segmented: */
7817#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7818 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7819 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7820
7821AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7822#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7823 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch>(\
7824 pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7825
7826
7827/* 8-bit flat: */
7828#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7829 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, true>(\
7830 pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7831
7832#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7833 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, true>(\
7834 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7835
7836#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7837 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, true>(\
7838 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7839
7840#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7841 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7842 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7843
7844#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7845 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, true>(\
7846 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7847
7848#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7849 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7850 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7851
7852#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7853 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7854 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7855
7856
7857/* 16-bit flat: */
7858#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7859 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7860 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7861
7862#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7863 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7864 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7865
7866#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7867 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, true>(\
7868 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7869
7870#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7871 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7872 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7873
7874#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7875 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7876 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7877
7878#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7879 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7880 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7881
7882/* 32-bit flat: */
7883#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7884 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7885 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7886
7887#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7888 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7889 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7890
7891#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7892 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7893 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7894
7895#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7896 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7897 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7898
7899#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7900 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7901 pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7902
7903#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7904 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7905 pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7906
7907#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7908 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7909 pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7910
7911#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7912 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7913 pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7914
7915#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7916 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7917 pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7918
7919#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7920 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7921 pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7922
7923
7924/* 64-bit flat: */
7925#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7926 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7927 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7928
7929#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7930 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7931 pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7932
7933
7934/* 128-bit segmented: */
7935#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7936 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch>(\
7937 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7938
7939#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7940 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
7941 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7942 kIemNativeEmitMemOp_Fetch>(\
7943 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7944
7945AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7946#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7947 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), \
7948 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7949 kIemNativeEmitMemOp_Fetch>(\
7950 pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7951
7952#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7953 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch>(\
7954 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7955
7956#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7957 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch>(\
7958 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7959
7960
7961/* 128-bit flat: */
7962#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7963 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7964 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7965
7966#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7967 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
7968 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7969 kIemNativeEmitMemOp_Fetch, true>(\
7970 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7971
7972#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7973 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), \
7974 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7975 kIemNativeEmitMemOp_Fetch, true>(\
7976 pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7977
7978#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7979 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7980 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7981
7982#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7983 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7984 pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7985
7986/* 256-bit segmented: */
7987#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7988 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch>(\
7989 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7990
7991#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7992 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch>(\
7993 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7994
7995#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7996 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
7997 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
7998 kIemNativeEmitMemOp_Fetch>(\
7999 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8000
8001#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8002 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch>(\
8003 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8004
8005
8006/* 256-bit flat: */
8007#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
8008 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8009 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8010
8011#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
8012 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8013 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8014
8015#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
8016 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8017 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8018 kIemNativeEmitMemOp_Fetch, true>(\
8019 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8020
8021#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
8022 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8023 pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8024
8025
8026
8027/*********************************************************************************************************************************
8028* Memory stores (IEM_MEM_STORE_XXX). *
8029*********************************************************************************************************************************/
8030
8031#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
8032 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store>(\
8033 pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8034
8035#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
8036 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store>(\
8037 pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8038
8039#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
8040 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store>(\
8041 pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8042
8043#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
8044 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store>(\
8045 pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8046
8047
8048#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
8049 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, true>(\
8050 pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8051
8052#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
8053 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8054 pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8055
8056#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
8057 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8058 pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8059
8060#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
8061 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8062 pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8063
8064
8065#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
8066 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint8_t)>(\
8067 pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8068
8069#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
8070 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint16_t)>(\
8071 pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8072
8073#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
8074 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint32_t)>(\
8075 pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8076
8077#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
8078 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint64_t)>(\
8079 pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8080
8081
8082#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
8083 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint8_t), true>(\
8084 pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8085
8086#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
8087 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint16_t), true>(\
8088 pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8089
8090#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
8091 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint32_t), true>(\
8092 pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8093
8094#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
8095 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint64_t), true>(\
8096 pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8097
8098/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
8099 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
8100template<uint8_t const a_cbMem, bool a_fFlat = false>
8101DECL_INLINE_THROW(uint32_t)
8102iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
8103 uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr)
8104{
8105 /*
8106 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
8107 * to do the grunt work.
8108 */
8109 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, a_cbMem, uValueConst);
8110 off = iemNativeEmitMemFetchStoreDataCommon<a_cbMem, a_cbMem - 1,
8111 kIemNativeEmitMemOp_Store,
8112 a_fFlat>(pReNative, off, idxVarConstValue, iSegReg,
8113 idxVarGCPtrMem, pfnFunction, idxInstr);
8114 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
8115 return off;
8116}
8117
8118
8119#define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
8120 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8121 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8122 kIemNativeEmitMemOp_Store>(\
8123 pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
8124
8125#define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
8126 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store>(\
8127 pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
8128
8129#define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
8130 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store>(\
8131 pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
8132
8133#define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
8134 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8135 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8136 kIemNativeEmitMemOp_Store>(\
8137 pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8138
8139
8140#define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
8141 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8142 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8143 kIemNativeEmitMemOp_Store, true>(\
8144 pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, \
8145 pCallEntry->idxInstr)
8146
8147#define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
8148 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, true>(\
8149 pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
8150
8151#define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
8152 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, true>(\
8153 pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
8154
8155#define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
8156 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8157 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8158 true>(\
8159 pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8160
8161
8162
8163/*********************************************************************************************************************************
8164* Stack Accesses. *
8165*********************************************************************************************************************************/
8166#define IEM_MC_PUSH_U16(a_u16Value) \
8167 off = iemNativeEmitStackPush<16, 0, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
8168#define IEM_MC_PUSH_U32(a_u32Value) \
8169 off = iemNativeEmitStackPush<32, 0, 0>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
8170#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
8171 off = iemNativeEmitStackPush<32, 0, 1>(pReNative, off, a_uSegVal, (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
8172#define IEM_MC_PUSH_U64(a_u64Value) \
8173 off = iemNativeEmitStackPush<64, 0, 0>(pReNative, off, a_u64Value, (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
8174
8175#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
8176 off = iemNativeEmitStackPush<16, 32, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8177#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
8178 off = iemNativeEmitStackPush<32, 32, 0>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
8179#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
8180 off = iemNativeEmitStackPush<32, 32, 1>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
8181
8182#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
8183 off = iemNativeEmitStackPush<16, 64, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8184#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
8185 off = iemNativeEmitStackPush<64, 64, 0>(pReNative, off, a_u64Value, (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
8186
8187
8188/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
8189template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat, bool a_fIsSegReg = false>
8190DECL_INLINE_THROW(uint32_t)
8191iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uintptr_t pfnFunction, uint8_t idxInstr)
8192{
8193 /*
8194 * Assert sanity.
8195 */
8196 AssertCompile(a_cBitsVar == 16 || a_cBitsVar == 32 || a_cBitsVar == 64);
8197 AssertCompile(a_cBitsFlat == 0 || a_cBitsFlat == 32 || a_cBitsFlat == 64);
8198 AssertCompile(!a_fIsSegReg || a_cBitsVar < 64);
8199 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8200 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
8201#ifdef VBOX_STRICT
8202 uint32_t const cTmplArgs = RT_MAKE_U32_FROM_U8(a_cBitsVar, a_cBitsFlat, a_fIsSegReg, 0);
8203 if (a_cBitsFlat != 0)
8204 {
8205 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8206 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8207 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8208 Assert( pfnFunction
8209 == ( cTmplArgs == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8210 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
8211 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
8212 : cTmplArgs == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8213 : cTmplArgs == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
8214 : UINT64_C(0xc000b000a0009000) ));
8215 }
8216 else
8217 Assert( pfnFunction
8218 == ( cTmplArgs == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
8219 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
8220 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
8221 : cTmplArgs == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
8222 : UINT64_C(0xc000b000a0009000) ));
8223#endif
8224
8225#ifdef VBOX_STRICT
8226 /*
8227 * Check that the fExec flags we've got make sense.
8228 */
8229 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8230#endif
8231
8232 /*
8233 * To keep things simple we have to commit any pending writes first as we
8234 * may end up making calls.
8235 */
8236 /** @todo we could postpone this till we make the call and reload the
8237 * registers after returning from the call. Not sure if that's sensible or
8238 * not, though. */
8239 off = iemNativeRegFlushPendingWrites(pReNative, off);
8240
8241 /*
8242 * First we calculate the new RSP and the effective stack pointer value.
8243 * For 64-bit mode and flat 32-bit these two are the same.
8244 * (Code structure is very similar to that of PUSH)
8245 */
8246 RT_CONSTEXPR
8247 uint8_t const cbMem = a_cBitsVar / 8;
8248 bool const fIsIntelSeg = a_fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
8249 uint8_t const cbMemAccess = !a_fIsSegReg || !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
8250 ? cbMem : sizeof(uint16_t);
8251 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8252 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8253 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8254 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8255 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
8256 {
8257 Assert(idxRegEffSp == idxRegRsp);
8258 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
8259 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8260 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8261 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
8262 else
8263 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
8264 }
8265 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8266 {
8267 Assert(idxRegEffSp != idxRegRsp);
8268 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8269 kIemNativeGstRegUse_ReadOnly);
8270#ifdef RT_ARCH_AMD64
8271 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8272#else
8273 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8274#endif
8275 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8276 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8277 offFixupJumpToUseOtherBitSp = off;
8278 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8279 {
8280 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8281 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8282 }
8283 else
8284 {
8285 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8286 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8287 }
8288 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8289 }
8290 /* SpUpdateEnd: */
8291 uint32_t const offLabelSpUpdateEnd = off;
8292
8293 /*
8294 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8295 * we're skipping lookup).
8296 */
8297 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8298 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
8299 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8300 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8301 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8302 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8303 : UINT32_MAX;
8304 uint8_t const idxRegValue = !TlbState.fSkip
8305 && pVarValue->enmKind != kIemNativeVarKind_Immediate
8306 ? iemNativeVarRegisterAcquireInitedWithPref(pReNative, idxVarValue, &off,
8307 IEMNATIVE_CALL_ARG2_GREG)
8308 : UINT8_MAX;
8309 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
8310
8311
8312 if (!TlbState.fSkip)
8313 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8314 else
8315 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8316
8317 /*
8318 * Use16BitSp:
8319 */
8320 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
8321 {
8322#ifdef RT_ARCH_AMD64
8323 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8324#else
8325 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8326#endif
8327 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8328 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8329 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8330 else
8331 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8332 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8333 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8334 }
8335
8336 /*
8337 * TlbMiss:
8338 *
8339 * Call helper to do the pushing.
8340 */
8341 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8342
8343#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8344 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8345#else
8346 RT_NOREF(idxInstr);
8347#endif
8348
8349 /* Save variables in volatile registers. */
8350 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8351 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8352 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
8353 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
8354 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8355
8356 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
8357 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
8358 {
8359 /* Swap them using ARG0 as temp register: */
8360 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
8361 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
8362 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
8363 }
8364 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
8365 {
8366 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
8367 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
8368 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8369
8370 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
8371 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8372 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8373 }
8374 else
8375 {
8376 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
8377 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8378
8379 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
8380 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
8381 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
8382 }
8383
8384#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8385 /* Do delayed EFLAGS calculations. */
8386 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
8387 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8388#endif
8389
8390 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8391 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8392
8393 /* Done setting up parameters, make the call. */
8394 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8395
8396 /* Restore variables and guest shadow registers to volatile registers. */
8397 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8398 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8399
8400#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8401 if (!TlbState.fSkip)
8402 {
8403 /* end of TlbMiss - Jump to the done label. */
8404 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8405 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8406
8407 /*
8408 * TlbLookup:
8409 */
8410 if (!a_fIsSegReg || cbMemAccess == cbMem)
8411 {
8412 Assert(cbMemAccess == cbMem);
8413 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_WRITE>(pReNative, off, &TlbState,
8414 iSegReg, idxLabelTlbLookup,
8415 idxLabelTlbMiss, idxRegMemResult);
8416 }
8417 else
8418 {
8419 Assert(cbMemAccess == sizeof(uint16_t));
8420 off = iemNativeEmitTlbLookup<true, sizeof(uint16_t), sizeof(uint16_t) - 1,
8421 IEM_ACCESS_TYPE_WRITE>(pReNative, off, &TlbState, iSegReg,
8422 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8423 }
8424
8425 /*
8426 * Emit code to do the actual storing / fetching.
8427 */
8428 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
8429# ifdef IEM_WITH_TLB_STATISTICS
8430 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8431 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8432# endif
8433 if (idxRegValue != UINT8_MAX)
8434 {
8435 switch (cbMemAccess)
8436 {
8437 case 2:
8438 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8439 break;
8440 case 4:
8441 if (!a_fIsSegReg || !fIsIntelSeg)
8442 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8443 else
8444 {
8445 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
8446 PUSH FS in real mode, so we have to try emulate that here.
8447 We borrow the now unused idxReg1 from the TLB lookup code here. */
8448 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
8449 kIemNativeGstReg_EFlags);
8450 if (idxRegEfl != UINT8_MAX)
8451 {
8452# ifdef ARCH_AMD64
8453 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
8454 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8455 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8456# else
8457 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
8458 off, TlbState.idxReg1, idxRegEfl,
8459 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8460# endif
8461 iemNativeRegFreeTmp(pReNative, idxRegEfl);
8462 }
8463 else
8464 {
8465 off = iemNativeEmitLoadGprWithGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, TlbState.idxReg1);
8466 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8467 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8468 }
8469 /* ASSUMES the upper half of idxRegValue is ZERO. */
8470 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
8471 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
8472 }
8473 break;
8474 case 8:
8475 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8476 break;
8477 default:
8478 AssertFailed();
8479 }
8480 }
8481 else
8482 {
8483 switch (cbMemAccess)
8484 {
8485 case 2:
8486 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
8487 idxRegMemResult, TlbState.idxReg1);
8488 break;
8489 case 4:
8490 Assert(!a_fIsSegReg);
8491 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
8492 idxRegMemResult, TlbState.idxReg1);
8493 break;
8494 case 8:
8495 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
8496 break;
8497 default:
8498 AssertFailed();
8499 }
8500 }
8501
8502 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8503 TlbState.freeRegsAndReleaseVars(pReNative);
8504
8505 /*
8506 * TlbDone:
8507 *
8508 * Commit the new RSP value.
8509 */
8510 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8511 }
8512#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8513
8514#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8515 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Rsp>(pReNative, off, idxRegRsp);
8516#endif
8517 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8518 if (idxRegEffSp != idxRegRsp)
8519 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8520
8521 /* The value variable is implictly flushed. */
8522 if (idxRegValue != UINT8_MAX)
8523 iemNativeVarRegisterRelease(pReNative, idxVarValue);
8524 iemNativeVarFreeLocal(pReNative, idxVarValue);
8525
8526 return off;
8527}
8528
8529
8530
8531#define IEM_MC_POP_GREG_U16(a_iGReg) \
8532 off = iemNativeEmitStackPopGReg<16, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
8533#define IEM_MC_POP_GREG_U32(a_iGReg) \
8534 off = iemNativeEmitStackPopGReg<32, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
8535#define IEM_MC_POP_GREG_U64(a_iGReg) \
8536 off = iemNativeEmitStackPopGReg<64, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
8537
8538#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
8539 off = iemNativeEmitStackPopGReg<16, 32>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8540#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
8541 off = iemNativeEmitStackPopGReg<32, 32>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
8542
8543#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
8544 off = iemNativeEmitStackPopGReg<16, 64>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8545#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
8546 off = iemNativeEmitStackPopGReg<64, 64>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
8547
8548
8549DECL_FORCE_INLINE_THROW(uint32_t)
8550iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
8551 uint8_t idxRegTmp)
8552{
8553 /* Use16BitSp: */
8554#ifdef RT_ARCH_AMD64
8555 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8556 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
8557 RT_NOREF(idxRegTmp);
8558#else
8559 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
8560 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
8561 /* add tmp, regrsp, #cbMem */
8562 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
8563 /* and tmp, tmp, #0xffff */
8564 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
8565 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
8566 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
8567 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
8568#endif
8569 return off;
8570}
8571
8572
8573DECL_FORCE_INLINE(uint32_t)
8574iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
8575{
8576 /* Use32BitSp: */
8577 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8578 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
8579 return off;
8580}
8581
8582
8583/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
8584template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat>
8585DECL_INLINE_THROW(uint32_t)
8586iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg, uintptr_t pfnFunction, uint8_t idxInstr)
8587{
8588 /*
8589 * Assert sanity.
8590 */
8591 Assert(idxGReg < 16);
8592#ifdef VBOX_STRICT
8593 if (a_cBitsFlat != 0)
8594 {
8595 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8596 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8597 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8598 Assert( pfnFunction
8599 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 32) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8600 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 32) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
8601 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 64) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8602 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 64) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
8603 : UINT64_C(0xc000b000a0009000) ));
8604 }
8605 else
8606 Assert( pfnFunction
8607 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
8608 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
8609 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
8610 : UINT64_C(0xc000b000a0009000) ));
8611#endif
8612
8613#ifdef VBOX_STRICT
8614 /*
8615 * Check that the fExec flags we've got make sense.
8616 */
8617 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8618#endif
8619
8620 /*
8621 * To keep things simple we have to commit any pending writes first as we
8622 * may end up making calls.
8623 */
8624 off = iemNativeRegFlushPendingWrites(pReNative, off);
8625
8626 /*
8627 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
8628 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
8629 * directly as the effective stack pointer.
8630 * (Code structure is very similar to that of PUSH)
8631 */
8632 uint8_t const cbMem = a_cBitsVar / 8;
8633 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8634 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8635 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8636 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8637 * will be the resulting register value. */
8638 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8639
8640 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8641 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
8642 {
8643 Assert(idxRegEffSp == idxRegRsp);
8644 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
8645 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8646 }
8647 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8648 {
8649 Assert(idxRegEffSp != idxRegRsp);
8650 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8651 kIemNativeGstRegUse_ReadOnly);
8652#ifdef RT_ARCH_AMD64
8653 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8654#else
8655 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8656#endif
8657 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8658 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8659 offFixupJumpToUseOtherBitSp = off;
8660 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8661 {
8662/** @todo can skip idxRegRsp updating when popping ESP. */
8663 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8664 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8665 }
8666 else
8667 {
8668 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8669 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8670 }
8671 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8672 }
8673 /* SpUpdateEnd: */
8674 uint32_t const offLabelSpUpdateEnd = off;
8675
8676 /*
8677 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8678 * we're skipping lookup).
8679 */
8680 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8681 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8682 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8683 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8684 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8685 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8686 : UINT32_MAX;
8687
8688 if (!TlbState.fSkip)
8689 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8690 else
8691 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8692
8693 /*
8694 * Use16BitSp:
8695 */
8696 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
8697 {
8698#ifdef RT_ARCH_AMD64
8699 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8700#else
8701 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8702#endif
8703 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8704 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8705 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8706 else
8707 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8708 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8709 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8710 }
8711
8712 /*
8713 * TlbMiss:
8714 *
8715 * Call helper to do the pushing.
8716 */
8717 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8718
8719#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8720 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8721#else
8722 RT_NOREF(idxInstr);
8723#endif
8724
8725 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8726 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8727 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8728 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8729
8730
8731 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8732 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8733 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8734
8735#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8736 /* Do delayed EFLAGS calculations. */
8737 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8738#endif
8739
8740 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8741 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8742
8743 /* Done setting up parameters, make the call. */
8744 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8745
8746 /* Move the return register content to idxRegMemResult. */
8747 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8748 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8749
8750 /* Restore variables and guest shadow registers to volatile registers. */
8751 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8752 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8753
8754#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8755 if (!TlbState.fSkip)
8756 {
8757 /* end of TlbMiss - Jump to the done label. */
8758 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8759 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8760
8761 /*
8762 * TlbLookup:
8763 */
8764 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ>(pReNative, off, &TlbState, iSegReg,
8765 idxLabelTlbLookup, idxLabelTlbMiss,
8766 idxRegMemResult);
8767
8768 /*
8769 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8770 */
8771 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8772# ifdef IEM_WITH_TLB_STATISTICS
8773 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8774 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8775# endif
8776 switch (cbMem)
8777 {
8778 case 2:
8779 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8780 break;
8781 case 4:
8782 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8783 break;
8784 case 8:
8785 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8786 break;
8787 default:
8788 AssertFailed();
8789 }
8790
8791 TlbState.freeRegsAndReleaseVars(pReNative);
8792
8793 /*
8794 * TlbDone:
8795 *
8796 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8797 * commit the popped register value.
8798 */
8799 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8800 }
8801#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8802
8803 if (idxGReg != X86_GREG_xSP)
8804 {
8805 /* Set the register. */
8806 if (cbMem >= sizeof(uint32_t))
8807 {
8808#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8809 AssertMsg( pReNative->idxCurCall == 0
8810 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8811 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8812 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8813#endif
8814 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8815#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8816 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8817#endif
8818#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8819 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8820 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8821#endif
8822 }
8823 else
8824 {
8825 Assert(cbMem == sizeof(uint16_t));
8826 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8827 kIemNativeGstRegUse_ForUpdate);
8828 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8829#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8830 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8831#endif
8832 iemNativeRegFreeTmp(pReNative, idxRegDst);
8833 }
8834
8835 /* Complete RSP calculation for FLAT mode. */
8836 if (idxRegEffSp == idxRegRsp)
8837 {
8838 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8839 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8840 else
8841 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8842 }
8843 }
8844 else
8845 {
8846 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8847 if (cbMem == sizeof(uint64_t))
8848 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8849 else if (cbMem == sizeof(uint32_t))
8850 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8851 else
8852 {
8853 if (idxRegEffSp == idxRegRsp)
8854 {
8855 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8856 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8857 else
8858 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8859 }
8860 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8861 }
8862 }
8863
8864#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8865 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Rsp>(pReNative, off, idxRegRsp);
8866#endif
8867
8868 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8869 if (idxRegEffSp != idxRegRsp)
8870 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8871 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8872
8873 return off;
8874}
8875
8876
8877
8878/*********************************************************************************************************************************
8879* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8880*********************************************************************************************************************************/
8881
8882#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8883 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/>(\
8884 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8885
8886#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8887 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/>(\
8888 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8889
8890#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8891 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/>(\
8892 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8893
8894#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8895 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/>(\
8896 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8897
8898
8899#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8900 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8901 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8902
8903#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8904 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8905 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8906
8907#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8908 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8909 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8910
8911#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8912 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8913 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8914
8915#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8916 off = iemNativeEmitMemMapCommon<sizeof(int16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8917 pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8918
8919
8920#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8921 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8922 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8923
8924#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8925 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8926 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8927
8928#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8929 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8930 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8931
8932#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8933 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8934 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8935
8936#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8937 off = iemNativeEmitMemMapCommon<sizeof(int32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8938 pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8939
8940
8941#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8942 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8943 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8944
8945#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8946 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8947 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8948#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8949 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8950 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8951
8952#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8953 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8954 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8955
8956#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8957 off = iemNativeEmitMemMapCommon<sizeof(int64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8958 pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8959
8960
8961#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8962 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8963 pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8964
8965#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8966 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, \
8967 sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/ /** @todo check BCD align */>(\
8968 pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8969
8970
8971#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8972 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
8973 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8974
8975#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8976 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
8977 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8978
8979#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8980 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
8981 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8982
8983#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8984 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
8985 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8986
8987
8988
8989#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8990 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, true>(\
8991 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8992
8993#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8994 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, true>(\
8995 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8996
8997#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8998 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, true>(\
8999 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
9000
9001#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9002 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, true>(\
9003 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
9004
9005
9006#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9007 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9008 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
9009
9010#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9011 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9012 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9013
9014#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9015 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9016 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9017
9018#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9019 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9020 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9021
9022#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9023 off = iemNativeEmitMemMapCommon<sizeof(int16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9024 pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9025
9026
9027#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9028 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9029 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
9030
9031#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9032 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9033 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9034
9035#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9036 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9037 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9038
9039#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9040 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9041 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9042
9043#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9044 off = iemNativeEmitMemMapCommon<sizeof(int32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9045 pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9046
9047
9048#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9049 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9050 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
9051
9052#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9053 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9054 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9055
9056#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9057 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9058 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9059
9060#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9061 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9062 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9063
9064#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9065 off = iemNativeEmitMemMapCommon<sizeof(int64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9066 pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9067
9068
9069#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9070 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9071 pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9072
9073#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9074 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, \
9075 sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/ /** @todo check BCD align */, true>(\
9076 pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9077
9078
9079#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9080 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9081 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
9082
9083#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9084 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9085 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9086
9087#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9088 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9089 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9090
9091#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9092 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9093 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9094
9095
9096template<uint8_t const a_cbMem, uint32_t const a_fAccess, uint32_t const a_fAlignMaskAndCtl, bool a_fFlat = false>
9097DECL_INLINE_THROW(uint32_t)
9098iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9099 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr)
9100{
9101 /*
9102 * Assert sanity.
9103 */
9104 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9105 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
9106 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
9107 && pVarMem->cbVar == sizeof(void *),
9108 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9109
9110 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9111 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9112 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
9113 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
9114 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9115
9116 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
9117 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9118 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
9119 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
9120 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9121
9122 Assert(!a_fFlat ? iSegReg < 6 : iSegReg == UINT8_MAX);
9123
9124 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9125
9126#ifdef VBOX_STRICT
9127# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
9128 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9129 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9130 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
9131 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9132# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9133 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
9134 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
9135 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
9136
9137 if RT_CONSTEXPR_IF(a_fFlat)
9138 {
9139 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9140 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9141 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9142 switch (a_cbMem)
9143 {
9144 case 1:
9145 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU8));
9146 Assert(!a_fAlignMaskAndCtl);
9147 break;
9148 case 2:
9149 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU16));
9150 Assert(a_fAlignMaskAndCtl < 2);
9151 break;
9152 case 4:
9153 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU32));
9154 Assert(a_fAlignMaskAndCtl < 4);
9155 break;
9156 case 8:
9157 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU64));
9158 Assert(a_fAlignMaskAndCtl < 8);
9159 break;
9160 case 10:
9161 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9162 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9163 Assert((a_fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9164 Assert(a_fAlignMaskAndCtl < 8);
9165 break;
9166 case 16:
9167 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU128));
9168 Assert(a_fAlignMaskAndCtl < 16);
9169 break;
9170# if 0
9171 case 32:
9172 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemFlatMapDataU256));
9173 Assert(a_fAlignMaskAndCtl < 32);
9174 break;
9175 case 64:
9176 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemFlatMapDataU512));
9177 Assert(a_fAlignMaskAndCtl < 64);
9178 break;
9179# endif
9180 default: AssertFailed(); break;
9181 }
9182 }
9183 else
9184 {
9185 Assert(iSegReg < 6);
9186 switch (a_cbMem)
9187 {
9188 case 1:
9189 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU8));
9190 Assert(!a_fAlignMaskAndCtl);
9191 break;
9192 case 2:
9193 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU16));
9194 Assert(a_fAlignMaskAndCtl < 2);
9195 break;
9196 case 4:
9197 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU32));
9198 Assert(a_fAlignMaskAndCtl < 4);
9199 break;
9200 case 8:
9201 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU64));
9202 Assert(a_fAlignMaskAndCtl < 8);
9203 break;
9204 case 10:
9205 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9206 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9207 Assert((a_fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9208 Assert(a_fAlignMaskAndCtl < 8);
9209 break;
9210 case 16:
9211 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU128));
9212 Assert(a_fAlignMaskAndCtl < 16);
9213 break;
9214# if 0
9215 case 32:
9216 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemMapDataU256));
9217 Assert(a_fAlignMaskAndCtl < 32);
9218 break;
9219 case 64:
9220 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemMapDataU512));
9221 Assert(a_fAlignMaskAndCtl < 64);
9222 break;
9223# endif
9224 default: AssertFailed(); break;
9225 }
9226 }
9227# undef IEM_MAP_HLP_FN
9228# undef IEM_MAP_HLP_FN_NO_AT
9229#endif
9230
9231#ifdef VBOX_STRICT
9232 /*
9233 * Check that the fExec flags we've got make sense.
9234 */
9235 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9236#endif
9237
9238 /*
9239 * To keep things simple we have to commit any pending writes first as we
9240 * may end up making calls.
9241 */
9242 off = iemNativeRegFlushPendingWrites(pReNative, off);
9243
9244#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9245 /*
9246 * Move/spill/flush stuff out of call-volatile registers.
9247 * This is the easy way out. We could contain this to the tlb-miss branch
9248 * by saving and restoring active stuff here.
9249 */
9250 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9251 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9252#endif
9253
9254 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
9255 while the tlb-miss codepath will temporarily put it on the stack.
9256 Set the the type to stack here so we don't need to do it twice below. */
9257 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9258 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
9259 /** @todo use a tmp register from TlbState, since they'll be free after tlb
9260 * lookup is done. */
9261
9262 /*
9263 * Define labels and allocate the result register (trying for the return
9264 * register if we can).
9265 */
9266 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9267 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9268 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
9269 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
9270 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, a_fFlat, a_cbMem);
9271 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
9272 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
9273 : UINT32_MAX;
9274
9275 /*
9276 * Jump to the TLB lookup code.
9277 */
9278 if (!TlbState.fSkip)
9279 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
9280
9281 /*
9282 * TlbMiss:
9283 *
9284 * Call helper to do the fetching.
9285 * We flush all guest register shadow copies here.
9286 */
9287 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
9288
9289#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9290 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9291#else
9292 RT_NOREF(idxInstr);
9293#endif
9294
9295#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9296 /* Save variables in volatile registers. */
9297 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
9298 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
9299#endif
9300
9301 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
9302 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
9303#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9304 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
9305#else
9306 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9307#endif
9308
9309 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9310 if RT_CONSTEXPR_IF(!a_fFlat)
9311 {
9312 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9313 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9314 }
9315
9316#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9317 /* Do delayed EFLAGS calculations. */
9318 if RT_CONSTEXPR_IF(a_fFlat)
9319 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
9320 fHstRegsNotToSave);
9321 else
9322 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
9323 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
9324 fHstRegsNotToSave);
9325#endif
9326
9327 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
9328 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
9329 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
9330
9331 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9332 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9333
9334 /* Done setting up parameters, make the call. */
9335 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9336
9337 /*
9338 * Put the output in the right registers.
9339 */
9340 Assert(idxRegMemResult == pVarMem->idxReg);
9341 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9342 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9343
9344#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9345 /* Restore variables and guest shadow registers to volatile registers. */
9346 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
9347 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
9348#endif
9349
9350 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
9351 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
9352
9353#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9354 if (!TlbState.fSkip)
9355 {
9356 /* end of tlbsmiss - Jump to the done label. */
9357 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9358 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
9359
9360 /*
9361 * TlbLookup:
9362 */
9363 off = iemNativeEmitTlbLookup<true, a_cbMem, a_fAlignMaskAndCtl, a_fAccess>(pReNative, off, &TlbState, iSegReg,
9364 idxLabelTlbLookup, idxLabelTlbMiss,
9365 idxRegMemResult);
9366# ifdef IEM_WITH_TLB_STATISTICS
9367 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
9368 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
9369# endif
9370
9371 /* [idxVarUnmapInfo] = 0; */
9372 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
9373
9374 /*
9375 * TlbDone:
9376 */
9377 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9378
9379 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
9380
9381# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9382 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
9383 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9384# endif
9385 }
9386#else
9387 RT_NOREF(idxLabelTlbMiss);
9388#endif
9389
9390 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9391 iemNativeVarRegisterRelease(pReNative, idxVarMem);
9392
9393 return off;
9394}
9395
9396
9397#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
9398 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, \
9399 pCallEntry->idxInstr, IEM_ACCESS_DATA_ATOMIC)
9400
9401#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9402 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, \
9403 pCallEntry->idxInstr, IEM_ACCESS_DATA_RW)
9404
9405#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9406 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, \
9407 pCallEntry->idxInstr, IEM_ACCESS_DATA_W)
9408
9409#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9410 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, \
9411 pCallEntry->idxInstr, IEM_ACCESS_DATA_R)
9412
9413DECL_INLINE_THROW(uint32_t)
9414iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9415 uintptr_t pfnFunction, uint8_t idxInstr, uint32_t fAccess)
9416{
9417 /*
9418 * Assert sanity.
9419 */
9420 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9421#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
9422 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9423#endif
9424 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
9425 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9426 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9427#ifdef VBOX_STRICT
9428 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
9429 {
9430 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
9431 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
9432 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
9433 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9434 case IEM_ACCESS_TYPE_WRITE:
9435 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9436 case IEM_ACCESS_TYPE_READ:
9437 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9438 default: AssertFailed();
9439 }
9440#else
9441 RT_NOREF(fAccess);
9442#endif
9443
9444 /*
9445 * To keep things simple we have to commit any pending writes first as we
9446 * may end up making calls (there shouldn't be any at this point, so this
9447 * is just for consistency).
9448 */
9449 /** @todo we could postpone this till we make the call and reload the
9450 * registers after returning from the call. Not sure if that's sensible or
9451 * not, though. */
9452 off = iemNativeRegFlushPendingWrites(pReNative, off);
9453
9454 /*
9455 * Move/spill/flush stuff out of call-volatile registers.
9456 *
9457 * We exclude any register holding the bUnmapInfo variable, as we'll be
9458 * checking it after returning from the call and will free it afterwards.
9459 */
9460 /** @todo save+restore active registers and maybe guest shadows in miss
9461 * scenario. */
9462 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
9463 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
9464
9465 /*
9466 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9467 * to call the unmap helper function.
9468 *
9469 * The likelyhood of it being zero is higher than for the TLB hit when doing
9470 * the mapping, as a TLB miss for an well aligned and unproblematic memory
9471 * access should also end up with a mapping that won't need special unmapping.
9472 */
9473 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
9474 * should speed up things for the pure interpreter as well when TLBs
9475 * are enabled. */
9476#ifdef RT_ARCH_AMD64
9477 if (pVarUnmapInfo->idxReg == UINT8_MAX)
9478 {
9479 /* test byte [rbp - xxx], 0ffh */
9480 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9481 pbCodeBuf[off++] = 0xf6;
9482 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
9483 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9484 pbCodeBuf[off++] = 0xff;
9485 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9486 }
9487 else
9488#endif
9489 {
9490 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInitedWithPref(pReNative, idxVarUnmapInfo, &off,
9491 IEMNATIVE_CALL_ARG1_GREG);
9492 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9493 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9494 }
9495 uint32_t const offJmpFixup = off;
9496 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
9497
9498 /*
9499 * Call the unmap helper function.
9500 */
9501#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9502 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9503#else
9504 RT_NOREF(idxInstr);
9505#endif
9506
9507 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
9508 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
9509 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9510
9511 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9512 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9513
9514 /* Done setting up parameters, make the call.
9515 Note! Since we can only end up here if we took a TLB miss, any postponed EFLAGS
9516 calculations has been done there already. Thus, a_fSkipEflChecks = true. */
9517 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9518
9519 /* The bUnmapInfo variable is implictly free by these MCs. */
9520 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
9521
9522 /*
9523 * Done, just fixup the jump for the non-call case.
9524 */
9525 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9526
9527 return off;
9528}
9529
9530
9531
9532/*********************************************************************************************************************************
9533* State and Exceptions *
9534*********************************************************************************************************************************/
9535
9536#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9537#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9538
9539#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9540#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9541#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9542
9543#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9544#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9545#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9546
9547
9548DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9549{
9550#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
9551 RT_NOREF(pReNative, fForChange);
9552#else
9553 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
9554 && fForChange)
9555 {
9556# ifdef RT_ARCH_AMD64
9557
9558 /* Need to save the host MXCSR the first time, and clear the exception flags. */
9559 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9560 {
9561 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9562
9563 /* stmxcsr */
9564 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9565 pbCodeBuf[off++] = X86_OP_REX_B;
9566 pbCodeBuf[off++] = 0x0f;
9567 pbCodeBuf[off++] = 0xae;
9568 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9569 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9570 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9571 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9572 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9573 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9574
9575 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9576 }
9577
9578 uint8_t const idxRegTmp = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
9579 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9580 kIemNativeGstRegUse_ReadOnly);
9581
9582 /*
9583 * Mask any exceptions and clear the exception status and save into MXCSR,
9584 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
9585 * a register source/target (sigh).
9586 */
9587 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
9588 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
9589 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
9590 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9591
9592 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9593
9594 /* ldmxcsr */
9595 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9596 pbCodeBuf[off++] = X86_OP_REX_B;
9597 pbCodeBuf[off++] = 0x0f;
9598 pbCodeBuf[off++] = 0xae;
9599 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9600 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9601 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9602 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9603 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9604 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9605
9606 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9607 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9608
9609# elif defined(RT_ARCH_ARM64)
9610 uint8_t const idxRegTmp = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
9611
9612 /* Need to save the host floating point control register the first time, clear FPSR. */
9613 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9614 {
9615 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9616 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9617 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9618 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9619 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9620 }
9621
9622 /*
9623 * Translate MXCSR to FPCR.
9624 *
9625 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9626 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9627 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9628 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9629 */
9630 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9631 * and implement alternate handling if FEAT_AFP is present. */
9632 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9633 kIemNativeGstRegUse_ReadOnly);
9634
9635 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9636
9637 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9638 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9639
9640 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9641 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9642 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9643 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9644 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9645
9646 /*
9647 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9648 *
9649 * Value MXCSR FPCR
9650 * 0 RN RN
9651 * 1 R- R+
9652 * 2 R+ R-
9653 * 3 RZ RZ
9654 *
9655 * Conversion can be achieved by switching bit positions
9656 */
9657 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9658 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9659 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9660 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9661
9662 /* Write the value to FPCR. */
9663 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9664
9665 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9666 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9667 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9668# else
9669# error "Port me"
9670# endif
9671 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9672 }
9673#endif
9674 return off;
9675}
9676
9677
9678
9679/*********************************************************************************************************************************
9680* Emitters for FPU related operations. *
9681*********************************************************************************************************************************/
9682
9683#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9684 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9685
9686/** Emits code for IEM_MC_FETCH_FCW. */
9687DECL_INLINE_THROW(uint32_t)
9688iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9689{
9690 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9691 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9692
9693 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9694
9695 /* Allocate a temporary FCW register. */
9696 /** @todo eliminate extra register */
9697 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9698 kIemNativeGstRegUse_ReadOnly);
9699
9700 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9701
9702 /* Free but don't flush the FCW register. */
9703 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9704 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9705
9706 return off;
9707}
9708
9709
9710#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9711 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9712
9713/** Emits code for IEM_MC_FETCH_FSW. */
9714DECL_INLINE_THROW(uint32_t)
9715iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9716{
9717 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9718 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9719
9720 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9721 /* Allocate a temporary FSW register. */
9722 /** @todo eliminate extra register */
9723 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9724 kIemNativeGstRegUse_ReadOnly);
9725
9726 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9727
9728 /* Free but don't flush the FSW register. */
9729 iemNativeRegFreeTmp(pReNative, idxFswReg);
9730 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9731
9732 return off;
9733}
9734
9735
9736
9737/*********************************************************************************************************************************
9738* Emitters for SSE/AVX specific operations. *
9739*********************************************************************************************************************************/
9740
9741#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9742 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9743
9744/** Emits code for IEM_MC_COPY_XREG_U128. */
9745DECL_INLINE_THROW(uint32_t)
9746iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9747{
9748 /* This is a nop if the source and destination register are the same. */
9749 if (iXRegDst != iXRegSrc)
9750 {
9751 /* Allocate destination and source register. */
9752 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9753 kIemNativeGstSimdRegLdStSz_Low128,
9754 kIemNativeGstRegUse_ForFullWrite);
9755 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9756 kIemNativeGstSimdRegLdStSz_Low128,
9757 kIemNativeGstRegUse_ReadOnly);
9758
9759 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9760
9761 /* Free but don't flush the source and destination register. */
9762 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9763 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9764 }
9765
9766 return off;
9767}
9768
9769
9770#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9771 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9772
9773/** Emits code for IEM_MC_FETCH_XREG_U128. */
9774DECL_INLINE_THROW(uint32_t)
9775iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9776{
9777 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9778 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9779
9780 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9781 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9782
9783 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9784
9785 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9786
9787 /* Free but don't flush the source register. */
9788 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9789 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9790
9791 return off;
9792}
9793
9794
9795#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9796 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9797
9798#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9799 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9800
9801/** Emits code for IEM_MC_FETCH_XREG_U64. */
9802DECL_INLINE_THROW(uint32_t)
9803iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9804{
9805 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9806 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9807
9808 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9809 kIemNativeGstSimdRegLdStSz_Low128,
9810 kIemNativeGstRegUse_ReadOnly);
9811
9812 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9813 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9814
9815 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9816
9817 /* Free but don't flush the source register. */
9818 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9819 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9820
9821 return off;
9822}
9823
9824
9825#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9826 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9827
9828#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9829 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9830
9831/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9832DECL_INLINE_THROW(uint32_t)
9833iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9834{
9835 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9836 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9837
9838 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9839 kIemNativeGstSimdRegLdStSz_Low128,
9840 kIemNativeGstRegUse_ReadOnly);
9841
9842 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9843 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9844
9845 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9846
9847 /* Free but don't flush the source register. */
9848 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9849 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9850
9851 return off;
9852}
9853
9854
9855#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9856 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9857
9858/** Emits code for IEM_MC_FETCH_XREG_U16. */
9859DECL_INLINE_THROW(uint32_t)
9860iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9861{
9862 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9863 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9864
9865 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9866 kIemNativeGstSimdRegLdStSz_Low128,
9867 kIemNativeGstRegUse_ReadOnly);
9868
9869 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9870 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9871
9872 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9873
9874 /* Free but don't flush the source register. */
9875 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9876 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9877
9878 return off;
9879}
9880
9881
9882#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9883 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9884
9885/** Emits code for IEM_MC_FETCH_XREG_U8. */
9886DECL_INLINE_THROW(uint32_t)
9887iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9888{
9889 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9890 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9891
9892 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9893 kIemNativeGstSimdRegLdStSz_Low128,
9894 kIemNativeGstRegUse_ReadOnly);
9895
9896 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9897 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9898
9899 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9900
9901 /* Free but don't flush the source register. */
9902 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9903 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9904
9905 return off;
9906}
9907
9908
9909#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9910 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9911
9912AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9913#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9914 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9915
9916
9917/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9918DECL_INLINE_THROW(uint32_t)
9919iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9920{
9921 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9922 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9923
9924 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9925 kIemNativeGstSimdRegLdStSz_Low128,
9926 kIemNativeGstRegUse_ForFullWrite);
9927 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9928
9929 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9930
9931 /* Free but don't flush the source register. */
9932 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9933 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9934
9935 return off;
9936}
9937
9938
9939#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9940 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9941
9942#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9943 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9944
9945#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9946 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9947
9948#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9949 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9950
9951#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9952 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9953
9954#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9955 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9956
9957/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9958DECL_INLINE_THROW(uint32_t)
9959iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9960 uint8_t cbLocal, uint8_t iElem)
9961{
9962 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9963 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
9964
9965#ifdef VBOX_STRICT
9966 switch (cbLocal)
9967 {
9968 case sizeof(uint64_t): Assert(iElem < 2); break;
9969 case sizeof(uint32_t): Assert(iElem < 4); break;
9970 case sizeof(uint16_t): Assert(iElem < 8); break;
9971 case sizeof(uint8_t): Assert(iElem < 16); break;
9972 default: AssertFailed();
9973 }
9974#endif
9975
9976 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9977 kIemNativeGstSimdRegLdStSz_Low128,
9978 kIemNativeGstRegUse_ForUpdate);
9979 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
9980
9981 switch (cbLocal)
9982 {
9983 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9984 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9985 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9986 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9987 default: AssertFailed();
9988 }
9989
9990 /* Free but don't flush the source register. */
9991 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9992 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9993
9994 return off;
9995}
9996
9997
9998#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
9999 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
10000
10001/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
10002DECL_INLINE_THROW(uint32_t)
10003iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10004{
10005 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10006 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10007
10008 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10009 kIemNativeGstSimdRegLdStSz_Low128,
10010 kIemNativeGstRegUse_ForUpdate);
10011 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10012
10013 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
10014 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10015 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10016
10017 /* Free but don't flush the source register. */
10018 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10019 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10020
10021 return off;
10022}
10023
10024
10025#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
10026 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
10027
10028/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
10029DECL_INLINE_THROW(uint32_t)
10030iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10031{
10032 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10033 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10034
10035 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10036 kIemNativeGstSimdRegLdStSz_Low128,
10037 kIemNativeGstRegUse_ForUpdate);
10038 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10039
10040 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
10041 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10042 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10043
10044 /* Free but don't flush the source register. */
10045 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10046 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10047
10048 return off;
10049}
10050
10051
10052#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
10053 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
10054
10055/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
10056DECL_INLINE_THROW(uint32_t)
10057iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
10058 uint8_t idxSrcVar, uint8_t iDwSrc)
10059{
10060 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10061 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10062
10063 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10064 kIemNativeGstSimdRegLdStSz_Low128,
10065 kIemNativeGstRegUse_ForUpdate);
10066 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10067
10068 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
10069 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
10070
10071 /* Free but don't flush the destination register. */
10072 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10073 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10074
10075 return off;
10076}
10077
10078
10079#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10080 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10081
10082/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
10083DECL_INLINE_THROW(uint32_t)
10084iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10085{
10086 /*
10087 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10088 * if iYRegDst gets allocated first for the full write it won't load the
10089 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10090 * duplicated from the already allocated host register for iYRegDst containing
10091 * garbage. This will be catched by the guest register value checking in debug
10092 * builds.
10093 */
10094 if (iYRegDst != iYRegSrc)
10095 {
10096 /* Allocate destination and source register. */
10097 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10098 kIemNativeGstSimdRegLdStSz_256,
10099 kIemNativeGstRegUse_ForFullWrite);
10100 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10101 kIemNativeGstSimdRegLdStSz_Low128,
10102 kIemNativeGstRegUse_ReadOnly);
10103
10104 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10105 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10106
10107 /* Free but don't flush the source and destination register. */
10108 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10109 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10110 }
10111 else
10112 {
10113 /* This effectively only clears the upper 128-bits of the register. */
10114 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10115 kIemNativeGstSimdRegLdStSz_High128,
10116 kIemNativeGstRegUse_ForFullWrite);
10117
10118 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10119
10120 /* Free but don't flush the destination register. */
10121 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10122 }
10123
10124 return off;
10125}
10126
10127
10128#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10129 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10130
10131/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
10132DECL_INLINE_THROW(uint32_t)
10133iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10134{
10135 /*
10136 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10137 * if iYRegDst gets allocated first for the full write it won't load the
10138 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10139 * duplicated from the already allocated host register for iYRegDst containing
10140 * garbage. This will be catched by the guest register value checking in debug
10141 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
10142 * for a zmm register we don't support yet, so this is just a nop.
10143 */
10144 if (iYRegDst != iYRegSrc)
10145 {
10146 /* Allocate destination and source register. */
10147 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10148 kIemNativeGstSimdRegLdStSz_256,
10149 kIemNativeGstRegUse_ReadOnly);
10150 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10151 kIemNativeGstSimdRegLdStSz_256,
10152 kIemNativeGstRegUse_ForFullWrite);
10153
10154 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10155
10156 /* Free but don't flush the source and destination register. */
10157 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10158 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10159 }
10160
10161 return off;
10162}
10163
10164
10165#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
10166 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
10167
10168/** Emits code for IEM_MC_FETCH_YREG_U128. */
10169DECL_INLINE_THROW(uint32_t)
10170iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
10171{
10172 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10173 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10174
10175 Assert(iDQWord <= 1);
10176 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10177 iDQWord == 1
10178 ? kIemNativeGstSimdRegLdStSz_High128
10179 : kIemNativeGstSimdRegLdStSz_Low128,
10180 kIemNativeGstRegUse_ReadOnly);
10181
10182 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10183 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10184
10185 if (iDQWord == 1)
10186 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10187 else
10188 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10189
10190 /* Free but don't flush the source register. */
10191 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10192 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10193
10194 return off;
10195}
10196
10197
10198#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
10199 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
10200
10201/** Emits code for IEM_MC_FETCH_YREG_U64. */
10202DECL_INLINE_THROW(uint32_t)
10203iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
10204{
10205 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10206 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10207
10208 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10209 iQWord >= 2
10210 ? kIemNativeGstSimdRegLdStSz_High128
10211 : kIemNativeGstSimdRegLdStSz_Low128,
10212 kIemNativeGstRegUse_ReadOnly);
10213
10214 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10215 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10216
10217 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10218
10219 /* Free but don't flush the source register. */
10220 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10221 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10222
10223 return off;
10224}
10225
10226
10227#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
10228 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
10229
10230/** Emits code for IEM_MC_FETCH_YREG_U32. */
10231DECL_INLINE_THROW(uint32_t)
10232iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
10233{
10234 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10235 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10236
10237 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10238 iDWord >= 4
10239 ? kIemNativeGstSimdRegLdStSz_High128
10240 : kIemNativeGstSimdRegLdStSz_Low128,
10241 kIemNativeGstRegUse_ReadOnly);
10242
10243 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10244 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10245
10246 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10247
10248 /* Free but don't flush the source register. */
10249 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10250 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10251
10252 return off;
10253}
10254
10255
10256#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
10257 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
10258
10259/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
10260DECL_INLINE_THROW(uint32_t)
10261iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10262{
10263 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10264 kIemNativeGstSimdRegLdStSz_High128,
10265 kIemNativeGstRegUse_ForFullWrite);
10266
10267 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10268
10269 /* Free but don't flush the register. */
10270 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10271
10272 return off;
10273}
10274
10275
10276#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
10277 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
10278
10279/** Emits code for IEM_MC_STORE_YREG_U128. */
10280DECL_INLINE_THROW(uint32_t)
10281iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
10282{
10283 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10284 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10285
10286 Assert(iDQword <= 1);
10287 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10288 iDQword == 0
10289 ? kIemNativeGstSimdRegLdStSz_Low128
10290 : kIemNativeGstSimdRegLdStSz_High128,
10291 kIemNativeGstRegUse_ForFullWrite);
10292
10293 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10294
10295 if (iDQword == 0)
10296 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10297 else
10298 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
10299
10300 /* Free but don't flush the source register. */
10301 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10302 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10303
10304 return off;
10305}
10306
10307
10308#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10309 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10310
10311/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
10312DECL_INLINE_THROW(uint32_t)
10313iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10314{
10315 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10316 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10317
10318 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10319 kIemNativeGstSimdRegLdStSz_256,
10320 kIemNativeGstRegUse_ForFullWrite);
10321
10322 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10323
10324 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10325 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10326
10327 /* Free but don't flush the source register. */
10328 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10329 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10330
10331 return off;
10332}
10333
10334
10335#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
10336 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
10337
10338/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
10339DECL_INLINE_THROW(uint32_t)
10340iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10341{
10342 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10343 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10344
10345 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10346 kIemNativeGstSimdRegLdStSz_256,
10347 kIemNativeGstRegUse_ForFullWrite);
10348
10349 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10350
10351 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10352 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10353
10354 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10355 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10356
10357 return off;
10358}
10359
10360
10361#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
10362 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
10363
10364/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
10365DECL_INLINE_THROW(uint32_t)
10366iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10367{
10368 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10369 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10370
10371 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10372 kIemNativeGstSimdRegLdStSz_256,
10373 kIemNativeGstRegUse_ForFullWrite);
10374
10375 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10376
10377 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10378 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10379
10380 /* Free but don't flush the source register. */
10381 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10382 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10383
10384 return off;
10385}
10386
10387
10388#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
10389 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
10390
10391/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
10392DECL_INLINE_THROW(uint32_t)
10393iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10394{
10395 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10396 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10397
10398 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10399 kIemNativeGstSimdRegLdStSz_256,
10400 kIemNativeGstRegUse_ForFullWrite);
10401
10402 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10403
10404 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10405 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10406
10407 /* Free but don't flush the source register. */
10408 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10409 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10410
10411 return off;
10412}
10413
10414
10415#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
10416 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
10417
10418/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
10419DECL_INLINE_THROW(uint32_t)
10420iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10421{
10422 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10423 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10424
10425 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10426 kIemNativeGstSimdRegLdStSz_256,
10427 kIemNativeGstRegUse_ForFullWrite);
10428
10429 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10430
10431 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10432 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10433
10434 /* Free but don't flush the source register. */
10435 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10436 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10437
10438 return off;
10439}
10440
10441
10442#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
10443 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
10444
10445/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
10446DECL_INLINE_THROW(uint32_t)
10447iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10448{
10449 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10450 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10451
10452 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10453 kIemNativeGstSimdRegLdStSz_256,
10454 kIemNativeGstRegUse_ForFullWrite);
10455
10456 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10457
10458 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10459
10460 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10461 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10462
10463 return off;
10464}
10465
10466
10467#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
10468 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
10469
10470/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
10471DECL_INLINE_THROW(uint32_t)
10472iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10473{
10474 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10475 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10476
10477 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10478 kIemNativeGstSimdRegLdStSz_256,
10479 kIemNativeGstRegUse_ForFullWrite);
10480
10481 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10482
10483 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10484
10485 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10486 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10487
10488 return off;
10489}
10490
10491
10492#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10493 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10494
10495/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
10496DECL_INLINE_THROW(uint32_t)
10497iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10498{
10499 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10500 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10501
10502 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10503 kIemNativeGstSimdRegLdStSz_256,
10504 kIemNativeGstRegUse_ForFullWrite);
10505
10506 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10507
10508 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10509
10510 /* Free but don't flush the source register. */
10511 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10512 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10513
10514 return off;
10515}
10516
10517
10518#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10519 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10520
10521/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
10522DECL_INLINE_THROW(uint32_t)
10523iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10524{
10525 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10526 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10527
10528 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10529 kIemNativeGstSimdRegLdStSz_256,
10530 kIemNativeGstRegUse_ForFullWrite);
10531
10532 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10533
10534 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10535
10536 /* Free but don't flush the source register. */
10537 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10538 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10539
10540 return off;
10541}
10542
10543
10544#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10545 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10546
10547/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
10548DECL_INLINE_THROW(uint32_t)
10549iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10550{
10551 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10552 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10553
10554 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10555 kIemNativeGstSimdRegLdStSz_256,
10556 kIemNativeGstRegUse_ForFullWrite);
10557
10558 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10559
10560 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
10561
10562 /* Free but don't flush the source register. */
10563 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10564 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10565
10566 return off;
10567}
10568
10569
10570#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10571 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10572
10573/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
10574DECL_INLINE_THROW(uint32_t)
10575iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10576{
10577 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10578 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10579
10580 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10581 kIemNativeGstSimdRegLdStSz_256,
10582 kIemNativeGstRegUse_ForFullWrite);
10583
10584 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10585
10586 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10587 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
10588
10589 /* Free but don't flush the source register. */
10590 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10591 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10592
10593 return off;
10594}
10595
10596
10597#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10598 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10599
10600/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
10601DECL_INLINE_THROW(uint32_t)
10602iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10603{
10604 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10605 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10606
10607 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10608 kIemNativeGstSimdRegLdStSz_256,
10609 kIemNativeGstRegUse_ForFullWrite);
10610
10611 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10612
10613 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10614 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10615
10616 /* Free but don't flush the source register. */
10617 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10618 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10619
10620 return off;
10621}
10622
10623
10624#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10625 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10626
10627/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10628DECL_INLINE_THROW(uint32_t)
10629iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10630{
10631 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10632 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10633
10634 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10635 kIemNativeGstSimdRegLdStSz_256,
10636 kIemNativeGstRegUse_ForFullWrite);
10637 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10638 kIemNativeGstSimdRegLdStSz_Low128,
10639 kIemNativeGstRegUse_ReadOnly);
10640 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10641
10642 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10643 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10644 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10645
10646 /* Free but don't flush the source and destination registers. */
10647 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10648 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10649 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10650
10651 return off;
10652}
10653
10654
10655#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10656 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10657
10658/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10659DECL_INLINE_THROW(uint32_t)
10660iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10661{
10662 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10663 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10664
10665 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10666 kIemNativeGstSimdRegLdStSz_256,
10667 kIemNativeGstRegUse_ForFullWrite);
10668 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10669 kIemNativeGstSimdRegLdStSz_Low128,
10670 kIemNativeGstRegUse_ReadOnly);
10671 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10672
10673 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10674 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10675 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10676
10677 /* Free but don't flush the source and destination registers. */
10678 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10679 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10680 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10681
10682 return off;
10683}
10684
10685
10686#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10687 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10688
10689
10690/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10691DECL_INLINE_THROW(uint32_t)
10692iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10693{
10694 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10695 kIemNativeGstSimdRegLdStSz_Low128,
10696 kIemNativeGstRegUse_ForUpdate);
10697
10698 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10699 if (bImm8Mask & RT_BIT(0))
10700 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10701 if (bImm8Mask & RT_BIT(1))
10702 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10703 if (bImm8Mask & RT_BIT(2))
10704 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10705 if (bImm8Mask & RT_BIT(3))
10706 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10707
10708 /* Free but don't flush the destination register. */
10709 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10710
10711 return off;
10712}
10713
10714
10715#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10716 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10717
10718#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10719 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10720
10721/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10722DECL_INLINE_THROW(uint32_t)
10723iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10724{
10725 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10726 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10727
10728 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10729 kIemNativeGstSimdRegLdStSz_256,
10730 kIemNativeGstRegUse_ReadOnly);
10731 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10732
10733 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10734
10735 /* Free but don't flush the source register. */
10736 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10737 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10738
10739 return off;
10740}
10741
10742
10743#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10744 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10745
10746#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10747 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10748
10749/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10750DECL_INLINE_THROW(uint32_t)
10751iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10752{
10753 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10754 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10755
10756 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10757 kIemNativeGstSimdRegLdStSz_256,
10758 kIemNativeGstRegUse_ForFullWrite);
10759 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10760
10761 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10762
10763 /* Free but don't flush the source register. */
10764 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10765 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10766
10767 return off;
10768}
10769
10770
10771#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10772 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10773
10774
10775/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10776DECL_INLINE_THROW(uint32_t)
10777iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10778 uint8_t idxSrcVar, uint8_t iDwSrc)
10779{
10780 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10781 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10782
10783 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10784 iDwDst < 4
10785 ? kIemNativeGstSimdRegLdStSz_Low128
10786 : kIemNativeGstSimdRegLdStSz_High128,
10787 kIemNativeGstRegUse_ForUpdate);
10788 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10789 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10790
10791 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10792 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10793
10794 /* Free but don't flush the source register. */
10795 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10796 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10797 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10798
10799 return off;
10800}
10801
10802
10803#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10804 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10805
10806
10807/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10808DECL_INLINE_THROW(uint32_t)
10809iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10810 uint8_t idxSrcVar, uint8_t iQwSrc)
10811{
10812 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10813 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10814
10815 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10816 iQwDst < 2
10817 ? kIemNativeGstSimdRegLdStSz_Low128
10818 : kIemNativeGstSimdRegLdStSz_High128,
10819 kIemNativeGstRegUse_ForUpdate);
10820 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10821 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10822
10823 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10824 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10825
10826 /* Free but don't flush the source register. */
10827 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10828 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10829 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10830
10831 return off;
10832}
10833
10834
10835#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10836 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10837
10838
10839/** Emits code for IEM_MC_STORE_YREG_U64. */
10840DECL_INLINE_THROW(uint32_t)
10841iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10842{
10843 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10844 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10845
10846 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10847 iQwDst < 2
10848 ? kIemNativeGstSimdRegLdStSz_Low128
10849 : kIemNativeGstSimdRegLdStSz_High128,
10850 kIemNativeGstRegUse_ForUpdate);
10851
10852 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10853
10854 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10855
10856 /* Free but don't flush the source register. */
10857 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10858 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10859
10860 return off;
10861}
10862
10863
10864#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
10865 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
10866
10867/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10868DECL_INLINE_THROW(uint32_t)
10869iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10870{
10871 RT_NOREF(pReNative, iYReg);
10872 /** @todo Needs to be implemented when support for AVX-512 is added. */
10873 return off;
10874}
10875
10876
10877
10878/*********************************************************************************************************************************
10879* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10880*********************************************************************************************************************************/
10881
10882/**
10883 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10884 */
10885DECL_INLINE_THROW(uint32_t)
10886iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
10887{
10888 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10889 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10890 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10891 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10892
10893#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
10894 /*
10895 * Need to do the FPU preparation.
10896 */
10897 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10898#endif
10899
10900 /*
10901 * Do all the call setup and cleanup.
10902 */
10903 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10904 false /*fFlushPendingWrites*/);
10905
10906 /*
10907 * Load the MXCSR register into the first argument and mask out the current exception flags.
10908 */
10909 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10910 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10911
10912 /*
10913 * Make the call.
10914 */
10915 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
10916
10917 /*
10918 * The updated MXCSR is in the return register, update exception status flags.
10919 *
10920 * The return register is marked allocated as a temporary because it is required for the
10921 * exception generation check below.
10922 */
10923 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
10924 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
10925 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
10926
10927#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10928 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10929 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_MxCsr>(pReNative, off, idxRegMxCsr);
10930#endif
10931
10932 /*
10933 * Make sure we don't have any outstanding guest register writes as we may
10934 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
10935 */
10936 off = iemNativeRegFlushPendingWrites(pReNative, off);
10937
10938#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10939 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10940#else
10941 RT_NOREF(idxInstr);
10942#endif
10943
10944 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
10945 * want to assume the existence for this instruction at the moment. */
10946 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
10947
10948 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
10949 /* tmp &= X86_MXCSR_XCPT_MASK */
10950 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
10951 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
10952 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
10953 /* tmp = ~tmp */
10954 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
10955 /* tmp &= mxcsr */
10956 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
10957 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseSseAvxFpRelated>(pReNative, off, idxRegTmp,
10958 X86_MXCSR_XCPT_FLAGS);
10959
10960 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
10961 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10962 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
10963
10964 return off;
10965}
10966
10967
10968#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
10969 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10970
10971/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
10972DECL_INLINE_THROW(uint32_t)
10973iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10974{
10975 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10976 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10977 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
10978}
10979
10980
10981#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10982 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10983
10984/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
10985DECL_INLINE_THROW(uint32_t)
10986iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
10987 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10988{
10989 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10990 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10991 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10992 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
10993}
10994
10995
10996/*********************************************************************************************************************************
10997* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
10998*********************************************************************************************************************************/
10999
11000#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
11001 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11002
11003/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
11004DECL_INLINE_THROW(uint32_t)
11005iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11006{
11007 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11008 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11009 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11010}
11011
11012
11013#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11014 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11015
11016/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
11017DECL_INLINE_THROW(uint32_t)
11018iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11019 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11020{
11021 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11022 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11023 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11024 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11025}
11026
11027
11028
11029/*********************************************************************************************************************************
11030* Include instruction emitters. *
11031*********************************************************************************************************************************/
11032#include "target-x86/IEMAllN8veEmit-x86.h"
11033
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette