VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 106319

Last change on this file since 106319 was 106319, checked in by vboxsync, 6 weeks ago

VMM/IEM: Reduced parameter count for iemNativeRegAllocTmp*. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 553.7 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 106319 2024-10-15 08:50:24Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62#include "target-x86/IEMAllN8veEmit-x86.h"
63
64
65/*
66 * Narrow down configs here to avoid wasting time on unused configs here.
67 * Note! Same checks in IEMAllThrdRecompiler.cpp.
68 */
69
70#ifndef IEM_WITH_CODE_TLB
71# error The code TLB must be enabled for the recompiler.
72#endif
73
74#ifndef IEM_WITH_DATA_TLB
75# error The data TLB must be enabled for the recompiler.
76#endif
77
78#ifndef IEM_WITH_SETJMP
79# error The setjmp approach must be enabled for the recompiler.
80#endif
81
82#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
83# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
84#endif
85
86
87/*********************************************************************************************************************************
88* Code emitters for flushing pending guest register writes and sanity checks *
89*********************************************************************************************************************************/
90
91#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
92
93# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
94/**
95 * Updates IEMCPU::uPcUpdatingDebug.
96 */
97DECL_INLINE_THROW(uint32_t) iemNativeEmitPcDebugAdd(PIEMRECOMPILERSTATE pReNative, uint32_t off, int64_t offDisp, uint8_t cBits)
98{
99# ifdef RT_ARCH_AMD64
100 if (pReNative->Core.fDebugPcInitialized && cBits >= 32)
101 {
102 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
103 if ((int32_t)offDisp == offDisp || cBits != 64)
104 {
105 /* add [q]word [pVCpu->iem.s.uPcUpdatingDebug], imm32/imm8 */
106 if (cBits == 64)
107 pCodeBuf[off++] = X86_OP_REX_W;
108 pCodeBuf[off++] = (int8_t)offDisp == offDisp ? 0x83 : 0x81;
109 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
110 if ((int8_t)offDisp == offDisp)
111 pCodeBuf[off++] = (int8_t)offDisp;
112 else
113 {
114 *(int32_t *)&pCodeBuf[off] = (int32_t)offDisp;
115 off += sizeof(int32_t);
116 }
117 }
118 else
119 {
120 /* mov tmp0, imm64 */
121 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, offDisp);
122
123 /* add [pVCpu->iem.s.uPcUpdatingDebug], tmp0 */
124 if (cBits == 64)
125 pCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
126 else if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
127 pCodeBuf[off++] = X86_OP_REX_R;
128 pCodeBuf[off++] = 0x01;
129 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0 & 7,
130 RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
131 }
132 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
133 return off;
134 }
135# endif
136
137 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
138 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 12);
139
140 if (pReNative->Core.fDebugPcInitialized)
141 {
142 Log4(("uPcUpdatingDebug+=%ld cBits=%d off=%#x\n", offDisp, cBits, off));
143 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
144 }
145 else
146 {
147 Log4(("uPcUpdatingDebug=rip+%ld cBits=%d off=%#x\n", offDisp, cBits, off));
148 pReNative->Core.fDebugPcInitialized = true;
149 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
150 }
151
152 if (cBits == 64)
153 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxTmpReg, offDisp, IEMNATIVE_REG_FIXED_TMP0);
154 else
155 {
156 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxTmpReg, (int32_t)offDisp, IEMNATIVE_REG_FIXED_TMP0);
157 if (cBits == 16)
158 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, UINT16_MAX);
159 }
160
161 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug),
162 IEMNATIVE_REG_FIXED_TMP0);
163
164 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
165 iemNativeRegFreeTmp(pReNative, idxTmpReg);
166 return off;
167}
168
169
170# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
171DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
172{
173 /* Compare the shadow with the context value, they should match. */
174 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
175 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
176 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
177 return off;
178}
179# endif
180
181#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
182
183/**
184 * Flushes delayed write of a specific guest register.
185 *
186 * This must be called prior to calling CImpl functions and any helpers that use
187 * the guest state (like raising exceptions) and such.
188 *
189 * This optimization has not yet been implemented. The first target would be
190 * RIP updates, since these are the most common ones.
191 */
192DECL_INLINE_THROW(uint32_t)
193iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
194{
195#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
196 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
197#endif
198
199#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
200#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
201 if ( enmClass == kIemNativeGstRegRef_EFlags
202 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
203 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
204#else
205 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
206#endif
207
208 if ( enmClass == kIemNativeGstRegRef_Gpr
209 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
210 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
211#endif
212
213#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
214 if ( enmClass == kIemNativeGstRegRef_XReg
215 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
216 {
217 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
218 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
219 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
220
221 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
222 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
223 }
224#endif
225 RT_NOREF(pReNative, enmClass, idxReg);
226 return off;
227}
228
229
230
231/*********************************************************************************************************************************
232* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
233*********************************************************************************************************************************/
234
235#undef IEM_MC_BEGIN /* unused */
236#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
237 { \
238 Assert(pReNative->Core.bmVars == 0); \
239 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
240 Assert(pReNative->Core.bmStack == 0); \
241 pReNative->fMc = (a_fMcFlags); \
242 pReNative->fCImpl = (a_fCImplFlags); \
243 pReNative->cArgsX = (a_cArgsIncludingHidden)
244
245/** We have to get to the end in recompilation mode, as otherwise we won't
246 * generate code for all the IEM_MC_IF_XXX branches. */
247#define IEM_MC_END() \
248 iemNativeVarFreeAll(pReNative); \
249 } return off
250
251
252
253/*********************************************************************************************************************************
254* Liveness Stubs *
255*********************************************************************************************************************************/
256
257#define IEM_MC_LIVENESS_GREG_INPUT(a_iGReg) ((void)0)
258#define IEM_MC_LIVENESS_GREG_CLOBBER(a_iGReg) ((void)0)
259#define IEM_MC_LIVENESS_GREG_MODIFY(a_iGReg) ((void)0)
260
261#define IEM_MC_LIVENESS_MREG_INPUT(a_iMReg) ((void)0)
262#define IEM_MC_LIVENESS_MREG_CLOBBER(a_iMReg) ((void)0)
263#define IEM_MC_LIVENESS_MREG_MODIFY(a_iMReg) ((void)0)
264
265#define IEM_MC_LIVENESS_XREG_INPUT(a_iXReg) ((void)0)
266#define IEM_MC_LIVENESS_XREG_CLOBBER(a_iXReg) ((void)0)
267#define IEM_MC_LIVENESS_XREG_MODIFY(a_iXReg) ((void)0)
268
269#define IEM_MC_LIVENESS_MXCSR_INPUT() ((void)0)
270#define IEM_MC_LIVENESS_MXCSR_CLOBBER() ((void)0)
271#define IEM_MC_LIVENESS_MXCSR_MODIFY() ((void)0)
272
273
274/*********************************************************************************************************************************
275* Native Emitter Support. *
276*********************************************************************************************************************************/
277
278#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
279
280#define IEM_MC_NATIVE_ELSE() } else {
281
282#define IEM_MC_NATIVE_ENDIF() } ((void)0)
283
284
285#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
286 off = a_fnEmitter(pReNative, off)
287
288#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
289 off = a_fnEmitter(pReNative, off, (a0))
290
291#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
292 off = a_fnEmitter(pReNative, off, (a0), (a1))
293
294#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
295 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
296
297#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
298 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
299
300#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
301 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
302
303#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
304 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
305
306#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
307 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
308
309#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
310 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
311
312#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
313 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
314
315
316#ifndef RT_ARCH_AMD64
317# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
318#else
319/** @note This is a naive approach that ASSUMES that the register isn't
320 * allocated, so it only works safely for the first allocation(s) in
321 * a MC block. */
322# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
323 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
324
325DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg,
326 uint32_t off, bool fAllocated);
327
328DECL_INLINE_THROW(uint32_t)
329iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
330{
331 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
332 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
333 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
334
335# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
336 /* Must flush the register if it hold pending writes. */
337 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
338 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
339 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
340# endif
341
342 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off, false /*fAllocated*/);
343 return off;
344}
345
346#endif /* RT_ARCH_AMD64 */
347
348
349
350/*********************************************************************************************************************************
351* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
352*********************************************************************************************************************************/
353
354#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
355 pReNative->fMc = 0; \
356 pReNative->fCImpl = (a_fFlags); \
357 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
358 a_cbInstr) /** @todo not used ... */
359
360
361#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
362 pReNative->fMc = 0; \
363 pReNative->fCImpl = (a_fFlags); \
364 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
365
366DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
367 uint8_t idxInstr, uint64_t a_fGstShwFlush,
368 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
369{
370 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
371}
372
373
374#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
375 pReNative->fMc = 0; \
376 pReNative->fCImpl = (a_fFlags); \
377 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
378 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
379
380DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
381 uint8_t idxInstr, uint64_t a_fGstShwFlush,
382 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
383{
384 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
385}
386
387
388#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
389 pReNative->fMc = 0; \
390 pReNative->fCImpl = (a_fFlags); \
391 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
392 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
393
394DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
395 uint8_t idxInstr, uint64_t a_fGstShwFlush,
396 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
397 uint64_t uArg2)
398{
399 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
400}
401
402
403
404/*********************************************************************************************************************************
405* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
406*********************************************************************************************************************************/
407
408/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
409 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
410DECL_INLINE_THROW(uint32_t)
411iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
412{
413 /*
414 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
415 * return with special status code and make the execution loop deal with
416 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
417 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
418 * could continue w/o interruption, it probably will drop into the
419 * debugger, so not worth the effort of trying to services it here and we
420 * just lump it in with the handling of the others.
421 *
422 * To simplify the code and the register state management even more (wrt
423 * immediate in AND operation), we always update the flags and skip the
424 * extra check associated conditional jump.
425 */
426 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
427 <= UINT32_MAX);
428#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
429 AssertMsg( pReNative->idxCurCall == 0
430 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
431 IEMLIVENESSBIT_IDX_EFL_OTHER)),
432 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
433 IEMLIVENESSBIT_IDX_EFL_OTHER)));
434#endif
435
436 /*
437 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
438 * any pending register writes must be flushed.
439 */
440 off = iemNativeRegFlushPendingWrites(pReNative, off);
441
442 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
443 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER),
444 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER));
445 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_ReturnWithFlags>(pReNative, off, idxEflReg,
446 X86_EFL_TF
447 | CPUMCTX_DBG_HIT_DRX_MASK
448 | CPUMCTX_DBG_DBGF_MASK);
449 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
450 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
451
452 /* Free but don't flush the EFLAGS register. */
453 iemNativeRegFreeTmp(pReNative, idxEflReg);
454
455 return off;
456}
457
458
459/** Helper for iemNativeEmitFinishInstructionWithStatus. */
460DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
461{
462 unsigned const offOpcodes = pCallEntry->offOpcode;
463 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
464 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
465 {
466 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
467 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
468 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
469 }
470 AssertFailedReturn(NIL_RTGCPHYS);
471}
472
473
474/** The VINF_SUCCESS dummy. */
475template<int const a_rcNormal, bool const a_fIsJump>
476DECL_FORCE_INLINE_THROW(uint32_t)
477iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
478 int32_t const offJump)
479{
480 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
481 if (a_rcNormal != VINF_SUCCESS)
482 {
483#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
484 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
485#else
486 RT_NOREF_PV(pCallEntry);
487#endif
488
489 /* As this code returns from the TB any pending register writes must be flushed. */
490 off = iemNativeRegFlushPendingWrites(pReNative, off);
491
492 /*
493 * If we're in a conditional, mark the current branch as exiting so we
494 * can disregard its state when we hit the IEM_MC_ENDIF.
495 */
496 iemNativeMarkCurCondBranchAsExiting(pReNative);
497
498 /*
499 * Use the lookup table for getting to the next TB quickly.
500 * Note! In this code path there can only be one entry at present.
501 */
502 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
503 PCIEMTB const pTbOrg = pReNative->pTbOrg;
504 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
505 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
506
507#if 0
508 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
509 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
510 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
511 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
512 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
513
514 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
515
516#else
517 /* Load the index as argument #1 for the helper call at the given label. */
518 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
519
520 /*
521 * Figure out the physical address of the current instruction and see
522 * whether the next instruction we're about to execute is in the same
523 * page so we by can optimistically skip TLB loading.
524 *
525 * - This is safe for all cases in FLAT mode.
526 * - In segmentmented modes it is complicated, given that a negative
527 * jump may underflow EIP and a forward jump may overflow or run into
528 * CS.LIM and triggering a #GP. The only thing we can get away with
529 * now at compile time is forward jumps w/o CS.LIM checks, since the
530 * lack of CS.LIM checks means we're good for the entire physical page
531 * we're executing on and another 15 bytes before we run into CS.LIM.
532 */
533 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
534# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
535 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
536# endif
537 )
538 {
539 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
540 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
541 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
542 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
543
544 {
545 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
546 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
547
548 /* Load the key lookup flags into the 2nd argument for the helper call.
549 - This is safe wrt CS limit checking since we're only here for FLAT modes.
550 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
551 interrupt shadow.
552 - The NMI inhibiting is more questionable, though... */
553 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
554 * Should we copy it into fExec to simplify this? OTOH, it's just a
555 * couple of extra instructions if EFLAGS are already in a register. */
556 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
557 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
558
559 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
560 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookup>(pReNative, off);
561 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithIrq>(pReNative, off);
562 }
563 }
564 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
565 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlb>(pReNative, off);
566 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq>(pReNative, off);
567#endif
568 }
569 return off;
570}
571
572
573#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
574 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
575 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
576
577#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
578 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
579 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
580 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
581
582/** Same as iemRegAddToRip64AndFinishingNoFlags. */
583DECL_INLINE_THROW(uint32_t)
584iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
585{
586#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
587# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
588 if (!pReNative->Core.offPc)
589 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
590# endif
591
592 /* Allocate a temporary PC register. */
593 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
594
595 /* Perform the addition and store the result. */
596 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
597 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
598
599 /* Free but don't flush the PC register. */
600 iemNativeRegFreeTmp(pReNative, idxPcReg);
601#endif
602
603#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
604 pReNative->Core.offPc += cbInstr;
605 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
606# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
607 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 64);
608 off = iemNativeEmitPcDebugCheck(pReNative, off);
609# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
610 off = iemNativePcAdjustCheck(pReNative, off);
611# endif
612 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
613#endif
614
615 return off;
616}
617
618
619#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
620 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
621 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
622
623#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
624 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
625 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
626 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
627
628/** Same as iemRegAddToEip32AndFinishingNoFlags. */
629DECL_INLINE_THROW(uint32_t)
630iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
631{
632#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
633# ifdef IEMNATIVE_REG_FIXED_PC_DBG
634 if (!pReNative->Core.offPc)
635 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
636# endif
637
638 /* Allocate a temporary PC register. */
639 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
640
641 /* Perform the addition and store the result. */
642 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
643 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
644
645 /* Free but don't flush the PC register. */
646 iemNativeRegFreeTmp(pReNative, idxPcReg);
647#endif
648
649#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
650 pReNative->Core.offPc += cbInstr;
651 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
652# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
653 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 32);
654 off = iemNativeEmitPcDebugCheck(pReNative, off);
655# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
656 off = iemNativePcAdjustCheck(pReNative, off);
657# endif
658 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
659#endif
660
661 return off;
662}
663
664
665#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
666 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
667 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
668
669#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
670 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
671 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
672 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
673
674/** Same as iemRegAddToIp16AndFinishingNoFlags. */
675DECL_INLINE_THROW(uint32_t)
676iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
677{
678#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
679# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
680 if (!pReNative->Core.offPc)
681 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
682# endif
683
684 /* Allocate a temporary PC register. */
685 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
686
687 /* Perform the addition and store the result. */
688 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
689 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
690 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
691
692 /* Free but don't flush the PC register. */
693 iemNativeRegFreeTmp(pReNative, idxPcReg);
694#endif
695
696#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
697 pReNative->Core.offPc += cbInstr;
698 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
699# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
700 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 16);
701 off = iemNativeEmitPcDebugCheck(pReNative, off);
702# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
703 off = iemNativePcAdjustCheck(pReNative, off);
704# endif
705 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
706#endif
707
708 return off;
709}
710
711
712/*********************************************************************************************************************************
713* Common code for changing PC/RIP/EIP/IP. *
714*********************************************************************************************************************************/
715
716/**
717 * Emits code to check if the content of @a idxAddrReg is a canonical address,
718 * raising a \#GP(0) if it isn't.
719 *
720 * @returns New code buffer offset, UINT32_MAX on failure.
721 * @param pReNative The native recompile state.
722 * @param off The code buffer offset.
723 * @param idxAddrReg The host register with the address to check.
724 * @param idxInstr The current instruction.
725 */
726DECL_FORCE_INLINE_THROW(uint32_t)
727iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
728{
729 /*
730 * Make sure we don't have any outstanding guest register writes as we may
731 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
732 */
733 off = iemNativeRegFlushPendingWrites(pReNative, off);
734
735#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
736 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
737#else
738 RT_NOREF(idxInstr);
739#endif
740
741#ifdef RT_ARCH_AMD64
742 /*
743 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
744 * return raisexcpt();
745 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
746 */
747 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
748
749 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
750 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
751 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
752 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
753 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
754
755 iemNativeRegFreeTmp(pReNative, iTmpReg);
756
757#elif defined(RT_ARCH_ARM64)
758 /*
759 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
760 * return raisexcpt();
761 * ----
762 * mov x1, 0x800000000000
763 * add x1, x0, x1
764 * cmp xzr, x1, lsr 48
765 * b.ne .Lraisexcpt
766 */
767 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
768
769 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
770 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
771 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
772 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
773
774 iemNativeRegFreeTmp(pReNative, iTmpReg);
775
776#else
777# error "Port me"
778#endif
779 return off;
780}
781
782
783/**
784 * Emits code to check if the content of @a idxAddrReg is a canonical address,
785 * raising a \#GP(0) if it isn't.
786 *
787 * Caller makes sure everything is flushed, except maybe PC.
788 *
789 * @returns New code buffer offset, UINT32_MAX on failure.
790 * @param pReNative The native recompile state.
791 * @param off The code buffer offset.
792 * @param idxAddrReg The host register with the address to check.
793 * @param offDisp The relative displacement that has already been
794 * added to idxAddrReg and must be subtracted if
795 * raising a \#GP(0).
796 * @param idxInstr The current instruction.
797 */
798DECL_FORCE_INLINE_THROW(uint32_t)
799iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
800 uint8_t idxAddrReg, int64_t offDisp, uint8_t idxInstr)
801{
802#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
803 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
804#endif
805
806#ifdef RT_ARCH_AMD64
807 /*
808 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
809 * return raisexcpt();
810 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
811 */
812 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
813
814 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
815 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
816 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
817 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
818
819#elif defined(RT_ARCH_ARM64)
820 /*
821 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
822 * return raisexcpt();
823 * ----
824 * mov x1, 0x800000000000
825 * add x1, x0, x1
826 * cmp xzr, x1, lsr 48
827 * b.ne .Lraisexcpt
828 */
829 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
830
831 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
832 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
833 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
834#else
835# error "Port me"
836#endif
837
838 /* Jump to the #GP code (hoping static prediction considers forward branches as not-taken). */
839 uint32_t const offFixup1 = off;
840 off = iemNativeEmitJnzToFixed(pReNative, off, off /*8-bit jump suffices*/);
841
842 /* jump .Lnoexcept; Skip the #GP code. */
843 uint32_t const offFixup2 = off;
844 off = iemNativeEmitJmpToFixed(pReNative, off, off /*8-bit jump suffices*/);
845
846 /* .Lraisexcpt: */
847 iemNativeFixupFixedJump(pReNative, offFixup1, off);
848#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
849 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
850#else
851 RT_NOREF(idxInstr);
852#endif
853
854 /* Undo the PC adjustment and store the old PC value. */
855 off = iemNativeEmitSubGprImm(pReNative, off, idxAddrReg, offDisp, iTmpReg);
856 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxAddrReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
857
858 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
859
860 /* .Lnoexcept: */
861 iemNativeFixupFixedJump(pReNative, offFixup2, off);
862
863 iemNativeRegFreeTmp(pReNative, iTmpReg);
864 return off;
865}
866
867
868/**
869 * Emits code to check if the content of @a idxAddrReg is a canonical address,
870 * raising a \#GP(0) if it isn't.
871 *
872 * Caller makes sure everything is flushed, except maybe PC.
873 *
874 * @returns New code buffer offset, UINT32_MAX on failure.
875 * @param pReNative The native recompile state.
876 * @param off The code buffer offset.
877 * @param idxAddrReg The host register with the address to check.
878 * @param idxOldPcReg Register holding the old PC that offPc is relative
879 * to if available, otherwise UINT8_MAX.
880 * @param idxInstr The current instruction.
881 */
882DECL_FORCE_INLINE_THROW(uint32_t)
883iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
884 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
885{
886#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
887 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
888#endif
889
890#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
891# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
892 if (!pReNative->Core.offPc)
893# endif
894 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
895#else
896 RT_NOREF(idxInstr);
897#endif
898
899#ifdef RT_ARCH_AMD64
900 /*
901 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
902 * return raisexcpt();
903 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
904 */
905 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
906
907 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
908 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
909 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
910 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
911
912#elif defined(RT_ARCH_ARM64)
913 /*
914 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
915 * return raisexcpt();
916 * ----
917 * mov x1, 0x800000000000
918 * add x1, x0, x1
919 * cmp xzr, x1, lsr 48
920 * b.ne .Lraisexcpt
921 */
922 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
923
924 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
925 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
926 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
927#else
928# error "Port me"
929#endif
930
931#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
932 if (pReNative->Core.offPc)
933 {
934 /** @todo On x86, it is said that conditional jumps forward are statically
935 * predicited as not taken, so this isn't a very good construct.
936 * Investigate whether it makes sense to invert it and add another
937 * jump. Also, find out wtf the static predictor does here on arm! */
938 uint32_t const offFixup = off;
939 off = iemNativeEmitJzToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
940
941 /* .Lraisexcpt: */
942# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
943 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
944# endif
945 /* We need to update cpum.GstCtx.rip. */
946 if (idxOldPcReg == UINT8_MAX)
947 {
948 idxOldPcReg = iTmpReg;
949 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
950 }
951 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
952 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
953
954 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
955 iemNativeFixupFixedJump(pReNative, offFixup, off);
956 }
957 else
958#endif
959 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
960
961 iemNativeRegFreeTmp(pReNative, iTmpReg);
962
963 return off;
964}
965
966
967/**
968 * Emits code to check if that the content of @a idxAddrReg is within the limit
969 * of CS, raising a \#GP(0) if it isn't.
970 *
971 * @returns New code buffer offset; throws VBox status code on error.
972 * @param pReNative The native recompile state.
973 * @param off The code buffer offset.
974 * @param idxAddrReg The host register (32-bit) with the address to
975 * check.
976 * @param idxInstr The current instruction.
977 */
978DECL_FORCE_INLINE_THROW(uint32_t)
979iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
980 uint8_t idxAddrReg, uint8_t idxInstr)
981{
982 /*
983 * Make sure we don't have any outstanding guest register writes as we may
984 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
985 */
986 off = iemNativeRegFlushPendingWrites(pReNative, off);
987
988#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
989 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
990#else
991 RT_NOREF(idxInstr);
992#endif
993
994 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
995 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
996 kIemNativeGstRegUse_ReadOnly);
997
998 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
999 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
1000
1001 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1002 return off;
1003}
1004
1005
1006
1007
1008/**
1009 * Emits code to check if that the content of @a idxAddrReg is within the limit
1010 * of CS, raising a \#GP(0) if it isn't.
1011 *
1012 * Caller makes sure everything is flushed, except maybe PC.
1013 *
1014 * @returns New code buffer offset; throws VBox status code on error.
1015 * @param pReNative The native recompile state.
1016 * @param off The code buffer offset.
1017 * @param idxAddrReg The host register (32-bit) with the address to
1018 * check.
1019 * @param idxOldPcReg Register holding the old PC that offPc is relative
1020 * to if available, otherwise UINT8_MAX.
1021 * @param idxInstr The current instruction.
1022 */
1023DECL_FORCE_INLINE_THROW(uint32_t)
1024iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1025 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
1026{
1027#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1028 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1029#endif
1030
1031#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1032# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1033 if (!pReNative->Core.offPc)
1034# endif
1035 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1036#else
1037 RT_NOREF(idxInstr);
1038#endif
1039
1040 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
1041 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
1042 kIemNativeGstRegUse_ReadOnly);
1043
1044 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
1045#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1046 if (pReNative->Core.offPc)
1047 {
1048 uint32_t const offFixup = off;
1049 off = iemNativeEmitJbeToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
1050
1051 /* Raising a GP(0), but first we need to update cpum.GstCtx.rip. */
1052 if (idxOldPcReg == UINT8_MAX)
1053 {
1054 idxOldPcReg = idxAddrReg;
1055 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1056 }
1057 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
1058 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1059# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1060 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1061# endif
1062 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
1063 iemNativeFixupFixedJump(pReNative, offFixup, off);
1064 }
1065 else
1066#endif
1067 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
1068
1069 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1070 return off;
1071}
1072
1073
1074/*********************************************************************************************************************************
1075* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
1076*********************************************************************************************************************************/
1077
1078#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1079 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1080 (a_enmEffOpSize), pCallEntry->idxInstr); \
1081 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1082
1083#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1084 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1085 (a_enmEffOpSize), pCallEntry->idxInstr); \
1086 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1087 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1088
1089#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
1090 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1091 IEMMODE_16BIT, pCallEntry->idxInstr); \
1092 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1093
1094#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1095 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1096 IEMMODE_16BIT, pCallEntry->idxInstr); \
1097 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1098 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1099
1100#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
1101 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1102 IEMMODE_64BIT, pCallEntry->idxInstr); \
1103 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1104
1105#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1106 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1107 IEMMODE_64BIT, pCallEntry->idxInstr); \
1108 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1109 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1110
1111
1112#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1113 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1114 (a_enmEffOpSize), pCallEntry->idxInstr); \
1115 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1116
1117#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1118 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1119 (a_enmEffOpSize), pCallEntry->idxInstr); \
1120 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1121 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1122
1123#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
1124 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1125 IEMMODE_16BIT, pCallEntry->idxInstr); \
1126 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1127
1128#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1129 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1130 IEMMODE_16BIT, pCallEntry->idxInstr); \
1131 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1132 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1133
1134#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
1135 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1136 IEMMODE_64BIT, pCallEntry->idxInstr); \
1137 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1138
1139#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1140 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1141 IEMMODE_64BIT, pCallEntry->idxInstr); \
1142 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1143 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1144
1145/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
1146 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
1147 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
1148template<bool const a_fWithinPage>
1149DECL_INLINE_THROW(uint32_t)
1150iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1151 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1152{
1153 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
1154#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1155 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1156 if (a_fWithinPage && enmEffOpSize == IEMMODE_64BIT)
1157 {
1158 /* No #GP checking required, just update offPc and get on with it. */
1159 pReNative->Core.offPc += (int64_t)offDisp + cbInstr;
1160# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1161 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1162# endif
1163 }
1164 else
1165#endif
1166 {
1167 /* Flush all but PC iff we're doing a 64-bit update here and this isn't within a page.. */
1168 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT && !a_fWithinPage))
1169 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1170
1171 /* Allocate a temporary PC register. */
1172 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1173 kIemNativeGstRegUse_ForUpdate);
1174
1175 /* Perform the addition. */
1176 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr + pReNative->Core.offPc);
1177
1178 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
1179 {
1180 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
1181 We can skip this if the target is within the same page. */
1182 if (!a_fWithinPage)
1183 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(pReNative, off, idxPcReg,
1184 (int64_t)offDisp + cbInstr, idxInstr);
1185 }
1186 else
1187 {
1188 /* Just truncate the result to 16-bit IP. */
1189 Assert(enmEffOpSize == IEMMODE_16BIT);
1190 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1191 }
1192
1193#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1194# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1195 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1196 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1197# endif
1198 /* Since we've already got the new PC value in idxPcReg, we can just as
1199 well write it out and reset offPc to zero. Otherwise, we'd need to use
1200 a copy the shadow PC, which will cost another move instruction here. */
1201# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS)
1202 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
1203 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
1204 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
1205 Log4(("iemNativeEmitRip64RelativeJumpAndFinishingNoFlags: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u cCondDepth=%d\n",
1206 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped, pReNative->cCondDepth));
1207 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
1208# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1209 iemNativeDbgInfoAddNativeOffset(pReNative, off);
1210 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
1211# endif
1212# endif
1213 pReNative->Core.offPc = 0;
1214#endif
1215
1216 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1217
1218 /* Free but don't flush the PC register. */
1219 iemNativeRegFreeTmp(pReNative, idxPcReg);
1220 }
1221 return off;
1222}
1223
1224
1225#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1226 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1227 (a_enmEffOpSize), pCallEntry->idxInstr); \
1228 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1229
1230#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1231 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1232 (a_enmEffOpSize), pCallEntry->idxInstr); \
1233 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1234 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1235
1236#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
1237 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1238 IEMMODE_16BIT, pCallEntry->idxInstr); \
1239 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1240
1241#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1242 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1243 IEMMODE_16BIT, pCallEntry->idxInstr); \
1244 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1245 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1246
1247#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
1248 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1249 IEMMODE_32BIT, pCallEntry->idxInstr); \
1250 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1251
1252#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1253 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1254 IEMMODE_32BIT, pCallEntry->idxInstr); \
1255 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1256 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1257
1258
1259#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1260 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1261 (a_enmEffOpSize), pCallEntry->idxInstr); \
1262 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1263
1264#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1265 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1266 (a_enmEffOpSize), pCallEntry->idxInstr); \
1267 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1268 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1269
1270#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
1271 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1272 IEMMODE_16BIT, pCallEntry->idxInstr); \
1273 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1274
1275#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1276 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1277 IEMMODE_16BIT, pCallEntry->idxInstr); \
1278 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1279 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1280
1281#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
1282 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1283 IEMMODE_32BIT, pCallEntry->idxInstr); \
1284 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1285
1286#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1287 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1288 IEMMODE_32BIT, pCallEntry->idxInstr); \
1289 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1290 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1291
1292/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
1293 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
1294 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
1295template<bool const a_fFlat>
1296DECL_INLINE_THROW(uint32_t)
1297iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1298 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1299{
1300 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
1301#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1302 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1303#endif
1304
1305 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1306 if (!a_fFlat || enmEffOpSize == IEMMODE_16BIT)
1307 {
1308 off = iemNativeRegFlushPendingWrites(pReNative, off);
1309#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1310 Assert(pReNative->Core.offPc == 0);
1311#endif
1312 }
1313
1314 /* Allocate a temporary PC register. */
1315 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1316
1317 /* Perform the addition. */
1318#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1319 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1320#else
1321 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1322#endif
1323
1324 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
1325 if (enmEffOpSize == IEMMODE_16BIT)
1326 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1327
1328 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
1329 if (!a_fFlat)
1330 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1331
1332 /* Commit it. */
1333#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1334 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, enmEffOpSize == IEMMODE_32BIT ? 32 : 16);
1335 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1336#endif
1337
1338 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1339#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1340 pReNative->Core.offPc = 0;
1341#endif
1342
1343 /* Free but don't flush the PC register. */
1344 iemNativeRegFreeTmp(pReNative, idxPcReg);
1345
1346 return off;
1347}
1348
1349
1350#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
1351 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1352 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1353
1354#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
1355 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1356 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1357 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1358
1359#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
1360 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1361 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1362
1363#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1364 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1365 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1366 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1367
1368#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
1369 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1370 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1371
1372#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1373 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1374 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1375 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1376
1377/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
1378DECL_INLINE_THROW(uint32_t)
1379iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1380 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
1381{
1382 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1383 off = iemNativeRegFlushPendingWrites(pReNative, off);
1384
1385#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1386 Assert(pReNative->Core.offPc == 0);
1387 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1388#endif
1389
1390 /* Allocate a temporary PC register. */
1391 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1392
1393 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
1394 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
1395 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1396 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1397#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1398 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, 16);
1399 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1400#endif
1401 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1402
1403 /* Free but don't flush the PC register. */
1404 iemNativeRegFreeTmp(pReNative, idxPcReg);
1405
1406 return off;
1407}
1408
1409
1410
1411/*********************************************************************************************************************************
1412* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
1413*********************************************************************************************************************************/
1414
1415/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
1416#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
1417 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1418
1419/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
1420#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
1421 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1422
1423/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
1424#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
1425 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1426
1427/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
1428 * clears flags. */
1429#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
1430 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
1431 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1432
1433/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
1434 * clears flags. */
1435#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
1436 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
1437 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1438
1439/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
1440 * clears flags. */
1441#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
1442 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
1443 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1444
1445#undef IEM_MC_SET_RIP_U16_AND_FINISH
1446
1447
1448/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
1449#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
1450 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1451
1452/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
1453#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
1454 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1455
1456/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
1457 * clears flags. */
1458#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
1459 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
1460 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1461
1462/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
1463 * and clears flags. */
1464#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
1465 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
1466 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1467
1468#undef IEM_MC_SET_RIP_U32_AND_FINISH
1469
1470
1471/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
1472#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
1473 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1474
1475/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
1476 * and clears flags. */
1477#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
1478 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
1479 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1480
1481#undef IEM_MC_SET_RIP_U64_AND_FINISH
1482
1483
1484/** Same as iemRegRipJumpU16AndFinishNoFlags,
1485 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
1486DECL_INLINE_THROW(uint32_t)
1487iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
1488 uint8_t idxInstr, uint8_t cbVar)
1489{
1490 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1491 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1492
1493 /* If we can't rule out a #GP(0) below, flush all dirty register except for
1494 PC which will be handled specially by the two workers below if they raise a GP. */
1495 bool const fMayRaiseGp0 = (f64Bit && cbVar > sizeof(uint32_t)) || (!f64Bit && !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1496 uint8_t const idxOldPcReg = fMayRaiseGp0
1497 ? iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_Pc)
1498 : UINT8_MAX;
1499 if (fMayRaiseGp0)
1500 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1501
1502 /* Get a register with the new PC loaded from idxVarPc.
1503 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1504 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1505
1506 /* Check that the target is within CS.LIM / is canonical (may #GP(0) + exit TB). */
1507 if (fMayRaiseGp0)
1508 {
1509 if (f64Bit)
1510 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1511 else
1512 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1513 }
1514
1515 /* Store the result. */
1516 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1517
1518#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1519 pReNative->Core.offPc = 0;
1520 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1521# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1522 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1523 pReNative->Core.fDebugPcInitialized = true;
1524 Log4(("uPcUpdatingDebug=rip off=%#x\n", off));
1525# endif
1526#endif
1527
1528 if (idxOldPcReg != UINT8_MAX)
1529 iemNativeRegFreeTmp(pReNative, idxOldPcReg);
1530 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1531 /** @todo implictly free the variable? */
1532
1533 return off;
1534}
1535
1536
1537
1538/*********************************************************************************************************************************
1539* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1540*********************************************************************************************************************************/
1541
1542/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1543 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1544DECL_FORCE_INLINE_THROW(uint32_t)
1545iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1546{
1547 /* Use16BitSp: */
1548#ifdef RT_ARCH_AMD64
1549 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1550 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1551#else
1552 /* sub regeff, regrsp, #cbMem */
1553 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1554 /* and regeff, regeff, #0xffff */
1555 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1556 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1557 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1558 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1559#endif
1560 return off;
1561}
1562
1563
1564DECL_FORCE_INLINE(uint32_t)
1565iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1566{
1567 /* Use32BitSp: */
1568 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1569 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1570 return off;
1571}
1572
1573
1574DECL_INLINE_THROW(uint32_t)
1575iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1576 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
1577{
1578 /*
1579 * Assert sanity.
1580 */
1581#ifdef VBOX_STRICT
1582 if (RT_BYTE2(cBitsVarAndFlat) != 0)
1583 {
1584 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1585 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1586 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1587 Assert( pfnFunction
1588 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1589 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1590 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1591 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1592 : UINT64_C(0xc000b000a0009000) ));
1593 }
1594 else
1595 Assert( pfnFunction
1596 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1597 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1598 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1599 : UINT64_C(0xc000b000a0009000) ));
1600#endif
1601
1602#ifdef VBOX_STRICT
1603 /*
1604 * Check that the fExec flags we've got make sense.
1605 */
1606 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1607#endif
1608
1609 /*
1610 * To keep things simple we have to commit any pending writes first as we
1611 * may end up making calls.
1612 */
1613 /** @todo we could postpone this till we make the call and reload the
1614 * registers after returning from the call. Not sure if that's sensible or
1615 * not, though. */
1616 off = iemNativeRegFlushPendingWrites(pReNative, off);
1617
1618 /*
1619 * First we calculate the new RSP and the effective stack pointer value.
1620 * For 64-bit mode and flat 32-bit these two are the same.
1621 * (Code structure is very similar to that of PUSH)
1622 */
1623 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1624 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1625 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1626 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1627 ? cbMem : sizeof(uint16_t);
1628 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1629 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1630 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1631 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1632 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1633 if (cBitsFlat != 0)
1634 {
1635 Assert(idxRegEffSp == idxRegRsp);
1636 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1637 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1638 if (cBitsFlat == 64)
1639 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1640 else
1641 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1642 }
1643 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1644 {
1645 Assert(idxRegEffSp != idxRegRsp);
1646 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1647 kIemNativeGstRegUse_ReadOnly);
1648#ifdef RT_ARCH_AMD64
1649 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1650#else
1651 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1652#endif
1653 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1654 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1655 offFixupJumpToUseOtherBitSp = off;
1656 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1657 {
1658 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1659 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1660 }
1661 else
1662 {
1663 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1664 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1665 }
1666 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1667 }
1668 /* SpUpdateEnd: */
1669 uint32_t const offLabelSpUpdateEnd = off;
1670
1671 /*
1672 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1673 * we're skipping lookup).
1674 */
1675 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1676 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1677 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1678 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1679 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1680 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1681 : UINT32_MAX;
1682 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1683
1684
1685 if (!TlbState.fSkip)
1686 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1687 else
1688 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1689
1690 /*
1691 * Use16BitSp:
1692 */
1693 if (cBitsFlat == 0)
1694 {
1695#ifdef RT_ARCH_AMD64
1696 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1697#else
1698 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1699#endif
1700 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1701 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1702 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1703 else
1704 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1705 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1706 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1707 }
1708
1709 /*
1710 * TlbMiss:
1711 *
1712 * Call helper to do the pushing.
1713 */
1714 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1715
1716#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1717 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1718#else
1719 RT_NOREF(idxInstr);
1720#endif
1721
1722 /* Save variables in volatile registers. */
1723 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1724 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1725 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1726 | (RT_BIT_32(idxRegPc));
1727 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1728
1729 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1730 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1731 {
1732 /* Swap them using ARG0 as temp register: */
1733 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1734 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1735 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1736 }
1737 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1738 {
1739 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1740 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1741
1742 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1743 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1745 }
1746 else
1747 {
1748 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1749 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1750
1751 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1752 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1753 }
1754
1755#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
1756 /* Do delayed EFLAGS calculations. */
1757 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
1758 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
1759#endif
1760
1761 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1762 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1763
1764 /* Done setting up parameters, make the call. */
1765 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
1766
1767 /* Restore variables and guest shadow registers to volatile registers. */
1768 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1769 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1770
1771#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1772 if (!TlbState.fSkip)
1773 {
1774 /* end of TlbMiss - Jump to the done label. */
1775 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1776 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1777
1778 /*
1779 * TlbLookup:
1780 */
1781 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1782 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1783
1784 /*
1785 * Emit code to do the actual storing / fetching.
1786 */
1787 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1788# ifdef IEM_WITH_TLB_STATISTICS
1789 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1790 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1791# endif
1792 switch (cbMemAccess)
1793 {
1794 case 2:
1795 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1796 break;
1797 case 4:
1798 if (!fIsIntelSeg)
1799 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1800 else
1801 {
1802 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1803 PUSH FS in real mode, so we have to try emulate that here.
1804 We borrow the now unused idxReg1 from the TLB lookup code here. */
1805 uint8_t const idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1806 kIemNativeGstReg_EFlags);
1807 if (idxRegEfl != UINT8_MAX)
1808 {
1809#ifdef ARCH_AMD64
1810 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1811 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1812 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1813#else
1814 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1815 off, TlbState.idxReg1, idxRegEfl,
1816 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1817#endif
1818 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1819 }
1820 else
1821 {
1822 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1823 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1824 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1825 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1826 }
1827 /* ASSUMES the upper half of idxRegPc is ZERO. */
1828 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1829 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1830 }
1831 break;
1832 case 8:
1833 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1834 break;
1835 default:
1836 AssertFailed();
1837 }
1838
1839 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1840 TlbState.freeRegsAndReleaseVars(pReNative);
1841
1842 /*
1843 * TlbDone:
1844 *
1845 * Commit the new RSP value.
1846 */
1847 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1848 }
1849#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1850
1851#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1852 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1853#endif
1854 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1855 if (idxRegEffSp != idxRegRsp)
1856 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1857
1858 return off;
1859}
1860
1861
1862/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1863#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1864 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1865
1866/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1867 * clears flags. */
1868#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1869 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1870 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1871
1872/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1873#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1874 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1875
1876/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1877 * clears flags. */
1878#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1879 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1880 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1881
1882#undef IEM_MC_IND_CALL_U16_AND_FINISH
1883
1884
1885/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1886#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1887 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1888
1889/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1890 * clears flags. */
1891#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1892 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1893 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1894
1895#undef IEM_MC_IND_CALL_U32_AND_FINISH
1896
1897
1898/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1899 * an extra parameter, for use in 64-bit code. */
1900#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1901 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1902
1903
1904/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1905 * an extra parameter, for use in 64-bit code and we need to check and clear
1906 * flags. */
1907#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1908 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1909 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1910
1911#undef IEM_MC_IND_CALL_U64_AND_FINISH
1912
1913/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1914 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1915DECL_INLINE_THROW(uint32_t)
1916iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1917 uint8_t idxInstr, uint8_t cbVar)
1918{
1919 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1920 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1921
1922 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1923 off = iemNativeRegFlushPendingWrites(pReNative, off);
1924
1925#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1926 Assert(pReNative->Core.offPc == 0);
1927 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1928#endif
1929
1930 /* Get a register with the new PC loaded from idxVarPc.
1931 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1932 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1933
1934 /* Check limit (may #GP(0) + exit TB). */
1935 if (!f64Bit)
1936/** @todo we can skip this test in FLAT 32-bit mode. */
1937 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1938 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1939 else if (cbVar > sizeof(uint32_t))
1940 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1941
1942#if 1
1943 /* Allocate a temporary PC register, we don't want it shadowed. */
1944 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1945 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1946#else
1947 /* Allocate a temporary PC register. */
1948 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1949 true /*fNoVolatileRegs*/);
1950#endif
1951
1952 /* Perform the addition and push the variable to the guest stack. */
1953 /** @todo Flat variants for PC32 variants. */
1954 switch (cbVar)
1955 {
1956 case sizeof(uint16_t):
1957 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1958 /* Truncate the result to 16-bit IP. */
1959 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1960 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1961 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1962 break;
1963 case sizeof(uint32_t):
1964 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1965 /** @todo In FLAT mode we can use the flat variant. */
1966 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1967 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1968 break;
1969 case sizeof(uint64_t):
1970 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1971 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1972 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1973 break;
1974 default:
1975 AssertFailed();
1976 }
1977
1978 /* RSP got changed, so do this again. */
1979 off = iemNativeRegFlushPendingWrites(pReNative, off);
1980
1981 /* Store the result. */
1982 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1983#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1984 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1985 pReNative->Core.fDebugPcInitialized = true;
1986 Log4(("uPcUpdatingDebug=rip/indirect-call off=%#x\n", off));
1987#endif
1988
1989#if 1
1990 /* Need to transfer the shadow information to the new RIP register. */
1991 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1992#else
1993 /* Sync the new PC. */
1994 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1995#endif
1996 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1997 iemNativeRegFreeTmp(pReNative, idxPcReg);
1998 /** @todo implictly free the variable? */
1999
2000 return off;
2001}
2002
2003
2004/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2005 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
2006#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
2007 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2008
2009/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2010 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
2011 * flags. */
2012#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
2013 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
2014 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2015
2016/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2017 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2018#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
2019 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2020
2021/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2022 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2023 * flags. */
2024#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
2025 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
2026 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2027
2028/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2029 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2030#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
2031 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2032
2033/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2034 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2035 * flags. */
2036#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
2037 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
2038 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2039
2040#undef IEM_MC_REL_CALL_S16_AND_FINISH
2041
2042/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2043 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2044DECL_INLINE_THROW(uint32_t)
2045iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
2046 uint8_t idxInstr)
2047{
2048 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2049 off = iemNativeRegFlushPendingWrites(pReNative, off);
2050
2051#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2052 Assert(pReNative->Core.offPc == 0);
2053 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2054#endif
2055
2056 /* Allocate a temporary PC register. */
2057 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2058 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
2059 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2060
2061 /* Calculate the new RIP. */
2062 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2063 /* Truncate the result to 16-bit IP. */
2064 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
2065 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2066 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2067
2068 /* Truncate the result to 16-bit IP. */
2069 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
2070
2071 /* Check limit (may #GP(0) + exit TB). */
2072 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2073
2074 /* Perform the addition and push the variable to the guest stack. */
2075 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
2076 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
2077
2078 /* RSP got changed, so flush again. */
2079 off = iemNativeRegFlushPendingWrites(pReNative, off);
2080
2081 /* Store the result. */
2082 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2083#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2084 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2085 pReNative->Core.fDebugPcInitialized = true;
2086 Log4(("uPcUpdatingDebug=rip/rel-call-16 off=%#x offDisp=%d\n", off, offDisp));
2087#endif
2088
2089 /* Need to transfer the shadow information to the new RIP register. */
2090 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2091 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2092 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2093
2094 return off;
2095}
2096
2097
2098/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2099 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2100#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
2101 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
2102
2103/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2104 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2105 * flags. */
2106#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
2107 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
2108 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2109
2110#undef IEM_MC_REL_CALL_S32_AND_FINISH
2111
2112/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2113 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2114DECL_INLINE_THROW(uint32_t)
2115iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
2116 uint8_t idxInstr)
2117{
2118 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2119 off = iemNativeRegFlushPendingWrites(pReNative, off);
2120
2121#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2122 Assert(pReNative->Core.offPc == 0);
2123 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2124#endif
2125
2126 /* Allocate a temporary PC register. */
2127 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2128 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2129 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2130
2131 /* Update the EIP to get the return address. */
2132 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2133
2134 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2135 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2136 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2137 /** @todo we can skip this test in FLAT 32-bit mode. */
2138 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2139
2140 /* Perform Perform the return address to the guest stack. */
2141 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
2142 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
2143 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
2144
2145 /* RSP got changed, so do this again. */
2146 off = iemNativeRegFlushPendingWrites(pReNative, off);
2147
2148 /* Store the result. */
2149 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2150#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2151 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2152 pReNative->Core.fDebugPcInitialized = true;
2153 Log4(("uPcUpdatingDebug=eip/rel-call-32 off=%#x offDisp=%d\n", off, offDisp));
2154#endif
2155
2156 /* Need to transfer the shadow information to the new RIP register. */
2157 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2158 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2159 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2160
2161 return off;
2162}
2163
2164
2165/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2166 * an extra parameter, for use in 64-bit code. */
2167#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
2168 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
2169
2170/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2171 * an extra parameter, for use in 64-bit code and we need to check and clear
2172 * flags. */
2173#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
2174 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
2175 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2176
2177#undef IEM_MC_REL_CALL_S64_AND_FINISH
2178
2179/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2180 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2181DECL_INLINE_THROW(uint32_t)
2182iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
2183 uint8_t idxInstr)
2184{
2185 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2186 off = iemNativeRegFlushPendingWrites(pReNative, off);
2187
2188#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2189 Assert(pReNative->Core.offPc == 0);
2190 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2191#endif
2192
2193 /* Allocate a temporary PC register. */
2194 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2195 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2196 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2197
2198 /* Update the RIP to get the return address. */
2199 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
2200
2201 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2202 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
2203 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
2204 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2205
2206 /* Perform Perform the return address to the guest stack. */
2207 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
2208 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
2209
2210 /* RSP got changed, so do this again. */
2211 off = iemNativeRegFlushPendingWrites(pReNative, off);
2212
2213 /* Store the result. */
2214 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2215#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2216 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2217 pReNative->Core.fDebugPcInitialized = true;
2218 Log4(("uPcUpdatingDebug=rip/rel-call-64 off=%#x offDisp=%ld\n", off, offDisp));
2219#endif
2220
2221 /* Need to transfer the shadow information to the new RIP register. */
2222 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2223 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2224 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2225
2226 return off;
2227}
2228
2229
2230/*********************************************************************************************************************************
2231* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
2232*********************************************************************************************************************************/
2233
2234DECL_FORCE_INLINE_THROW(uint32_t)
2235iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2236 uint16_t cbPopAdd, uint8_t idxRegTmp)
2237{
2238 /* Use16BitSp: */
2239#ifdef RT_ARCH_AMD64
2240 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2241 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
2242 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
2243 RT_NOREF(idxRegTmp);
2244
2245#elif defined(RT_ARCH_ARM64)
2246 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
2247 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
2248 /* add tmp, regrsp, #cbMem */
2249 uint16_t const cbCombined = cbMem + cbPopAdd;
2250 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
2251 if (cbCombined >= RT_BIT_32(12))
2252 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
2253 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2254 /* and tmp, tmp, #0xffff */
2255 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2256 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
2257 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
2258 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
2259
2260#else
2261# error "Port me"
2262#endif
2263 return off;
2264}
2265
2266
2267DECL_FORCE_INLINE_THROW(uint32_t)
2268iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2269 uint16_t cbPopAdd)
2270{
2271 /* Use32BitSp: */
2272 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2273 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
2274 return off;
2275}
2276
2277
2278/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
2279#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
2280 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
2281
2282/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
2283#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2284 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
2285
2286/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
2287#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2288 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
2289
2290/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
2291 * clears flags. */
2292#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
2293 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
2294 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2295
2296/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
2297 * clears flags. */
2298#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2299 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
2300 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2301
2302/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
2303 * clears flags. */
2304#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2305 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
2306 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2307
2308/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
2309DECL_INLINE_THROW(uint32_t)
2310iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
2311 IEMMODE enmEffOpSize, uint8_t idxInstr)
2312{
2313 RT_NOREF(cbInstr);
2314
2315#ifdef VBOX_STRICT
2316 /*
2317 * Check that the fExec flags we've got make sense.
2318 */
2319 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
2320#endif
2321
2322 /*
2323 * To keep things simple we have to commit any pending writes first as we
2324 * may end up making calls.
2325 */
2326 off = iemNativeRegFlushPendingWrites(pReNative, off);
2327
2328 /*
2329 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
2330 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
2331 * directly as the effective stack pointer.
2332 * (Code structure is very similar to that of PUSH)
2333 *
2334 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
2335 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
2336 * aren't commonly used (or useful) and thus not in need of optimizing.
2337 *
2338 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
2339 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
2340 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
2341 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
2342 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
2343 */
2344 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
2345 ? sizeof(uint64_t)
2346 : enmEffOpSize == IEMMODE_32BIT
2347 ? sizeof(uint32_t)
2348 : sizeof(uint16_t);
2349 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
2350 uintptr_t const pfnFunction = fFlat
2351 ? enmEffOpSize == IEMMODE_64BIT
2352 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
2353 : (uintptr_t)iemNativeHlpStackFlatFetchU32
2354 : enmEffOpSize == IEMMODE_32BIT
2355 ? (uintptr_t)iemNativeHlpStackFetchU32
2356 : (uintptr_t)iemNativeHlpStackFetchU16;
2357 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
2358 fFlat ? kIemNativeGstRegUse_ForUpdate
2359 : kIemNativeGstRegUse_Calculation,
2360 true /*fNoVolatileRegs*/);
2361 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
2362 /** @todo can do a better job picking the register here. For cbMem >= 4 this
2363 * will be the resulting register value. */
2364 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
2365
2366 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
2367 if (fFlat)
2368 Assert(idxRegEffSp == idxRegRsp);
2369 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
2370 {
2371 Assert(idxRegEffSp != idxRegRsp);
2372 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
2373 kIemNativeGstRegUse_ReadOnly);
2374#ifdef RT_ARCH_AMD64
2375 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2376#else
2377 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2378#endif
2379 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
2380 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
2381 offFixupJumpToUseOtherBitSp = off;
2382 if (enmEffOpSize == IEMMODE_32BIT)
2383 {
2384 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
2385 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2386 }
2387 else
2388 {
2389 Assert(enmEffOpSize == IEMMODE_16BIT);
2390 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
2391 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2392 idxRegMemResult);
2393 }
2394 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2395 }
2396 /* SpUpdateEnd: */
2397 uint32_t const offLabelSpUpdateEnd = off;
2398
2399 /*
2400 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
2401 * we're skipping lookup).
2402 */
2403 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
2404 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
2405 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
2406 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
2407 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
2408 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
2409 : UINT32_MAX;
2410
2411 if (!TlbState.fSkip)
2412 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
2413 else
2414 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
2415
2416 /*
2417 * Use16BitSp:
2418 */
2419 if (!fFlat)
2420 {
2421#ifdef RT_ARCH_AMD64
2422 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2423#else
2424 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2425#endif
2426 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
2427 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
2428 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2429 idxRegMemResult);
2430 else
2431 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2432 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
2433 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2434 }
2435
2436 /*
2437 * TlbMiss:
2438 *
2439 * Call helper to do the pushing.
2440 */
2441 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
2442
2443#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2444 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2445#else
2446 RT_NOREF(idxInstr);
2447#endif
2448
2449 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
2450 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
2451 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
2452 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
2453
2454
2455 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
2456 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
2457 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
2458
2459#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2460 /* Do delayed EFLAGS calculations. */
2461 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
2462#endif
2463
2464 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
2465 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2466
2467 /* Done setting up parameters, make the call. */
2468 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
2469
2470 /* Move the return register content to idxRegMemResult. */
2471 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
2472 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
2473
2474 /* Restore variables and guest shadow registers to volatile registers. */
2475 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
2476 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
2477
2478#ifdef IEMNATIVE_WITH_TLB_LOOKUP
2479 if (!TlbState.fSkip)
2480 {
2481 /* end of TlbMiss - Jump to the done label. */
2482 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
2483 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
2484
2485 /*
2486 * TlbLookup:
2487 */
2488 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
2489 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
2490
2491 /*
2492 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
2493 */
2494 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2495# ifdef IEM_WITH_TLB_STATISTICS
2496 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
2497 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
2498# endif
2499 switch (cbMem)
2500 {
2501 case 2:
2502 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2503 break;
2504 case 4:
2505 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2506 break;
2507 case 8:
2508 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2509 break;
2510 default:
2511 AssertFailed();
2512 }
2513
2514 TlbState.freeRegsAndReleaseVars(pReNative);
2515
2516 /*
2517 * TlbDone:
2518 *
2519 * Set the new RSP value (FLAT accesses needs to calculate it first) and
2520 * commit the popped register value.
2521 */
2522 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
2523 }
2524#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
2525
2526 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
2527 if (!f64Bit)
2528/** @todo we can skip this test in FLAT 32-bit mode. */
2529 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2530 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2531 else if (enmEffOpSize == IEMMODE_64BIT)
2532 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2533
2534 /* Complete RSP calculation for FLAT mode. */
2535 if (idxRegEffSp == idxRegRsp)
2536 {
2537 if (enmEffOpSize == IEMMODE_64BIT)
2538 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
2539 else
2540 {
2541 Assert(enmEffOpSize == IEMMODE_32BIT);
2542 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
2543 }
2544 }
2545
2546 /* Commit the result and clear any current guest shadows for RIP. */
2547 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
2548 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2549 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2550#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2551 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2552 pReNative->Core.fDebugPcInitialized = true;
2553 Log4(("uPcUpdatingDebug=rip/ret off=%#x\n", off));
2554#endif
2555
2556 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2557 if (!fFlat)
2558 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2559
2560 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2561 if (idxRegEffSp != idxRegRsp)
2562 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2563 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2564 return off;
2565}
2566
2567
2568/*********************************************************************************************************************************
2569* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2570*********************************************************************************************************************************/
2571
2572#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2573 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2574
2575/**
2576 * Emits code to check if a \#NM exception should be raised.
2577 *
2578 * @returns New code buffer offset, UINT32_MAX on failure.
2579 * @param pReNative The native recompile state.
2580 * @param off The code buffer offset.
2581 * @param idxInstr The current instruction.
2582 */
2583DECL_INLINE_THROW(uint32_t)
2584iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2585{
2586#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2587 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2588
2589 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2590 {
2591#endif
2592 /*
2593 * Make sure we don't have any outstanding guest register writes as we may
2594 * raise an #NM and all guest register must be up to date in CPUMCTX.
2595 */
2596 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2597 off = iemNativeRegFlushPendingWrites(pReNative, off);
2598
2599#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2600 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2601#else
2602 RT_NOREF(idxInstr);
2603#endif
2604
2605 /* Allocate a temporary CR0 register. */
2606 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2607 kIemNativeGstRegUse_ReadOnly);
2608
2609 /*
2610 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2611 * return raisexcpt();
2612 */
2613 /* Test and jump. */
2614 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg,
2615 X86_CR0_EM | X86_CR0_TS);
2616
2617 /* Free but don't flush the CR0 register. */
2618 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2619
2620#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2621 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2622 }
2623 else
2624 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2625#endif
2626
2627 return off;
2628}
2629
2630
2631#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2632 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2633
2634/**
2635 * Emits code to check if a \#NM exception should be raised.
2636 *
2637 * @returns New code buffer offset, UINT32_MAX on failure.
2638 * @param pReNative The native recompile state.
2639 * @param off The code buffer offset.
2640 * @param idxInstr The current instruction.
2641 */
2642DECL_INLINE_THROW(uint32_t)
2643iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2644{
2645#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2646 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2647
2648 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2649 {
2650#endif
2651 /*
2652 * Make sure we don't have any outstanding guest register writes as we may
2653 * raise an #NM and all guest register must be up to date in CPUMCTX.
2654 */
2655 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2656 off = iemNativeRegFlushPendingWrites(pReNative, off);
2657
2658#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2659 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2660#else
2661 RT_NOREF(idxInstr);
2662#endif
2663
2664 /* Allocate a temporary CR0 register. */
2665 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2666 kIemNativeGstRegUse_Calculation);
2667
2668 /*
2669 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2670 * return raisexcpt();
2671 */
2672 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2673 /* Test and jump. */
2674 off = iemNativeEmitTbExitIfGpr32EqualsImm<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2675
2676 /* Free the CR0 register. */
2677 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2678
2679#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2680 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2681 }
2682 else
2683 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2684#endif
2685
2686 return off;
2687}
2688
2689
2690#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2691 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2692
2693/**
2694 * Emits code to check if a \#MF exception should be raised.
2695 *
2696 * @returns New code buffer offset, UINT32_MAX on failure.
2697 * @param pReNative The native recompile state.
2698 * @param off The code buffer offset.
2699 * @param idxInstr The current instruction.
2700 */
2701DECL_INLINE_THROW(uint32_t)
2702iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2703{
2704 /*
2705 * Make sure we don't have any outstanding guest register writes as we may
2706 * raise an #MF and all guest register must be up to date in CPUMCTX.
2707 */
2708 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2709 off = iemNativeRegFlushPendingWrites(pReNative, off);
2710
2711#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2712 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2713#else
2714 RT_NOREF(idxInstr);
2715#endif
2716
2717 /* Allocate a temporary FSW register. */
2718 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2719 kIemNativeGstRegUse_ReadOnly);
2720
2721 /*
2722 * if (FSW & X86_FSW_ES != 0)
2723 * return raisexcpt();
2724 */
2725 /* Test and jump. */
2726 off = iemNativeEmitTbExitIfBitSetInGpr<kIemNativeLabelType_RaiseMf>(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT);
2727
2728 /* Free but don't flush the FSW register. */
2729 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2730
2731 return off;
2732}
2733
2734
2735#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2736 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2737
2738/**
2739 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2740 *
2741 * @returns New code buffer offset, UINT32_MAX on failure.
2742 * @param pReNative The native recompile state.
2743 * @param off The code buffer offset.
2744 * @param idxInstr The current instruction.
2745 */
2746DECL_INLINE_THROW(uint32_t)
2747iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2748{
2749#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2750 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2751
2752 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2753 {
2754#endif
2755 /*
2756 * Make sure we don't have any outstanding guest register writes as we may
2757 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2758 */
2759 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2760 off = iemNativeRegFlushPendingWrites(pReNative, off);
2761
2762#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2763 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2764#else
2765 RT_NOREF(idxInstr);
2766#endif
2767
2768 /* Allocate a temporary CR0 and CR4 register. */
2769 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2770 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2771 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2772
2773 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2774#ifdef RT_ARCH_AMD64
2775 /*
2776 * We do a modified test here:
2777 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2778 * else { goto RaiseSseRelated; }
2779 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2780 * all targets except the 386, which doesn't support SSE, this should
2781 * be a safe assumption.
2782 */
2783 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2784 1+6+3+3+7+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2785 //pCodeBuf[off++] = 0xcc;
2786 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2787 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2788 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2789 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2790 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2791 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2792
2793#elif defined(RT_ARCH_ARM64)
2794 /*
2795 * We do a modified test here:
2796 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2797 * else { goto RaiseSseRelated; }
2798 */
2799 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2800 1+5 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2801 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2802 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2803 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2804 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2805 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2806 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2807 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2808 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2809 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off,
2810 idxTmpReg, false /*f64Bit*/);
2811
2812#else
2813# error "Port me!"
2814#endif
2815
2816 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2817 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2818 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2819 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2820
2821#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2822 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2823 }
2824 else
2825 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2826#endif
2827
2828 return off;
2829}
2830
2831
2832#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2833 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2834
2835/**
2836 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2837 *
2838 * @returns New code buffer offset, UINT32_MAX on failure.
2839 * @param pReNative The native recompile state.
2840 * @param off The code buffer offset.
2841 * @param idxInstr The current instruction.
2842 */
2843DECL_INLINE_THROW(uint32_t)
2844iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2845{
2846#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2847 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2848
2849 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2850 {
2851#endif
2852 /*
2853 * Make sure we don't have any outstanding guest register writes as we may
2854 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2855 */
2856 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2857 off = iemNativeRegFlushPendingWrites(pReNative, off);
2858
2859#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2860 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2861#else
2862 RT_NOREF(idxInstr);
2863#endif
2864
2865 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2866 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2867 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2868 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2869 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2870
2871 /*
2872 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2873 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2874 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2875 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2876 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2877 * { likely }
2878 * else { goto RaiseAvxRelated; }
2879 */
2880#ifdef RT_ARCH_AMD64
2881 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2882 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2883 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2884 ^ 0x1a) ) { likely }
2885 else { goto RaiseAvxRelated; } */
2886 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2887 1+6+3+5+3+5+3+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2888 //pCodeBuf[off++] = 0xcc;
2889 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2890 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2891 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2892 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2893 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2894 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2895 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2896 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2897 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2898 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2899 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2900
2901#elif defined(RT_ARCH_ARM64)
2902 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2903 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2904 else { goto RaiseAvxRelated; } */
2905 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2906 1+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2907 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2908 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2909 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2910 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2911 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2912 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2913 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2914 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2915 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2916 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2917 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2918 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off,
2919 idxTmpReg, false /*f64Bit*/);
2920
2921#else
2922# error "Port me!"
2923#endif
2924
2925 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2926 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2927 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2928 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2929#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2930 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2931 }
2932 else
2933 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2934#endif
2935
2936 return off;
2937}
2938
2939
2940#define IEM_MC_RAISE_DIVIDE_ERROR_IF_LOCAL_IS_ZERO(a_uVar) \
2941 off = iemNativeEmitRaiseDivideErrorIfLocalIsZero(pReNative, off, a_uVar, pCallEntry->idxInstr)
2942
2943/**
2944 * Emits code to raise a \#DE if a local variable is zero.
2945 *
2946 * @returns New code buffer offset, UINT32_MAX on failure.
2947 * @param pReNative The native recompile state.
2948 * @param off The code buffer offset.
2949 * @param idxVar The variable to check. This must be 32-bit (EFLAGS).
2950 * @param idxInstr The current instruction.
2951 */
2952DECL_INLINE_THROW(uint32_t)
2953iemNativeEmitRaiseDivideErrorIfLocalIsZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxInstr)
2954{
2955 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2956 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, sizeof(uint32_t));
2957
2958 /* Make sure we don't have any outstanding guest register writes as we may. */
2959 off = iemNativeRegFlushPendingWrites(pReNative, off);
2960
2961 /* Set the instruction number if we're counting. */
2962#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2963 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2964#else
2965 RT_NOREF(idxInstr);
2966#endif
2967
2968 /* Do the job we're here for. */
2969 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off);
2970 off = iemNativeEmitTbExitIfGprIsZero<kIemNativeLabelType_RaiseDe>(pReNative, off, idxVarReg, false /*f64Bit*/);
2971 iemNativeVarRegisterRelease(pReNative, idxVar);
2972
2973 return off;
2974}
2975
2976
2977#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2978 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2979
2980/**
2981 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2982 *
2983 * @returns New code buffer offset, UINT32_MAX on failure.
2984 * @param pReNative The native recompile state.
2985 * @param off The code buffer offset.
2986 * @param idxInstr The current instruction.
2987 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2988 * @param cbAlign The alignment in bytes to check against.
2989 */
2990DECL_INLINE_THROW(uint32_t)
2991iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2992 uint8_t idxVarEffAddr, uint8_t cbAlign)
2993{
2994 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2995 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2996
2997 /*
2998 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2999 */
3000 off = iemNativeRegFlushPendingWrites(pReNative, off);
3001
3002#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3003 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
3004#else
3005 RT_NOREF(idxInstr);
3006#endif
3007
3008 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
3009 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseGp0>(pReNative, off, idxVarReg, cbAlign - 1);
3010 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
3011
3012 return off;
3013}
3014
3015
3016/*********************************************************************************************************************************
3017* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
3018*********************************************************************************************************************************/
3019
3020/**
3021 * Pushes an IEM_MC_IF_XXX onto the condition stack.
3022 *
3023 * @returns Pointer to the condition stack entry on success, NULL on failure
3024 * (too many nestings)
3025 */
3026DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
3027{
3028 uint32_t const idxStack = pReNative->cCondDepth;
3029 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
3030
3031 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
3032 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
3033
3034 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
3035 pEntry->fInElse = false;
3036 pEntry->fIfExitTb = false;
3037 pEntry->fElseExitTb = false;
3038 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
3039 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
3040
3041 return pEntry;
3042}
3043
3044
3045/**
3046 * Start of the if-block, snapshotting the register and variable state.
3047 */
3048DECL_INLINE_THROW(void)
3049iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
3050{
3051 Assert(offIfBlock != UINT32_MAX);
3052 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3053 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3054 Assert(!pEntry->fInElse);
3055
3056 /* Define the start of the IF block if request or for disassembly purposes. */
3057 if (idxLabelIf != UINT32_MAX)
3058 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
3059#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3060 else
3061 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
3062#else
3063 RT_NOREF(offIfBlock);
3064#endif
3065
3066 /* Copy the initial state so we can restore it in the 'else' block. */
3067 pEntry->InitialState = pReNative->Core;
3068}
3069
3070
3071#define IEM_MC_ELSE() } while (0); \
3072 off = iemNativeEmitElse(pReNative, off); \
3073 do {
3074
3075/** Emits code related to IEM_MC_ELSE. */
3076DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3077{
3078 /* Check sanity and get the conditional stack entry. */
3079 Assert(off != UINT32_MAX);
3080 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3081 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3082 Assert(!pEntry->fInElse);
3083
3084 /* We can skip dirty register flushing and the dirty register flushing if
3085 the branch already jumped to a TB exit. */
3086 if (!pEntry->fIfExitTb)
3087 {
3088#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) && 0
3089 /* Writeback any dirty shadow registers. */
3090 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
3091 * in one of the branches and leave guest registers already dirty before the start of the if
3092 * block alone. */
3093 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3094#endif
3095
3096 /* Jump to the endif. */
3097 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
3098 }
3099# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3100 else
3101 Assert(pReNative->Core.offPc == 0);
3102# endif
3103
3104 /* Define the else label and enter the else part of the condition. */
3105 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3106 pEntry->fInElse = true;
3107
3108 /* Snapshot the core state so we can do a merge at the endif and restore
3109 the snapshot we took at the start of the if-block. */
3110 pEntry->IfFinalState = pReNative->Core;
3111 pReNative->Core = pEntry->InitialState;
3112
3113 return off;
3114}
3115
3116
3117#define IEM_MC_ENDIF() } while (0); \
3118 off = iemNativeEmitEndIf(pReNative, off)
3119
3120/** Emits code related to IEM_MC_ENDIF. */
3121DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3122{
3123 /* Check sanity and get the conditional stack entry. */
3124 Assert(off != UINT32_MAX);
3125 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3126 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3127
3128#if defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) && 0
3129 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3130#endif
3131
3132 /*
3133 * If either of the branches exited the TB, we can take the state from the
3134 * other branch and skip all the merging headache.
3135 */
3136 bool fDefinedLabels = false;
3137 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
3138 {
3139#ifdef VBOX_STRICT
3140 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
3141 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
3142 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
3143 ? &pEntry->IfFinalState : &pReNative->Core;
3144# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3145 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
3146# endif
3147# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3148 Assert(pExitCoreState->offPc == 0);
3149# endif
3150 RT_NOREF(pExitCoreState);
3151#endif
3152
3153 if (!pEntry->fIfExitTb)
3154 {
3155 Assert(pEntry->fInElse);
3156 pReNative->Core = pEntry->IfFinalState;
3157 }
3158 }
3159 else
3160 {
3161 /*
3162 * Now we have find common group with the core state at the end of the
3163 * if-final. Use the smallest common denominator and just drop anything
3164 * that isn't the same in both states.
3165 */
3166 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
3167 * which is why we're doing this at the end of the else-block.
3168 * But we'd need more info about future for that to be worth the effort. */
3169 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
3170#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3171 AssertMsgStmt(pReNative->Core.offPc == pOther->offPc,
3172 ("Core.offPc=%#RX64 pOther->offPc=%#RX64\n", pReNative->Core.offPc, pOther->offPc),
3173 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3174#endif
3175
3176 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
3177 {
3178#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3179 /*
3180 * If the branch has differences in dirty shadow registers, we will flush
3181 * the register only dirty in the current branch and dirty any that's only
3182 * dirty in the other one.
3183 */
3184 uint64_t const fGstRegDirtyOther = pOther->bmGstRegShadowDirty;
3185 uint64_t const fGstRegDirtyThis = pReNative->Core.bmGstRegShadowDirty;
3186 uint64_t const fGstRegDirtyDiff = fGstRegDirtyOther ^ fGstRegDirtyThis;
3187 uint64_t const fGstRegDirtyHead = fGstRegDirtyThis & fGstRegDirtyDiff;
3188 uint64_t fGstRegDirtyTail = fGstRegDirtyOther & fGstRegDirtyDiff;
3189 if (!fGstRegDirtyDiff)
3190 { /* likely */ }
3191 else
3192 {
3193 //uint64_t const fGstRegDirtyHead = pReNative->Core.bmGstRegShadowDirty & fGstRegDirtyDiff;
3194 if (fGstRegDirtyHead)
3195 {
3196 Log12(("iemNativeEmitEndIf: flushing dirty guest registers in current branch: %RX64\n", fGstRegDirtyHead));
3197 off = iemNativeRegFlushDirtyGuest(pReNative, off, fGstRegDirtyHead);
3198 }
3199 }
3200#endif
3201
3202 /*
3203 * Shadowed guest registers.
3204 *
3205 * We drop any shadows where the two states disagree about where
3206 * things are kept. We may end up flushing dirty more registers
3207 * here, if the two branches keeps things in different registers.
3208 */
3209 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
3210 if (fGstRegs)
3211 {
3212 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
3213 do
3214 {
3215 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3216 fGstRegs &= ~RT_BIT_64(idxGstReg);
3217
3218 uint8_t const idxCurHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3219 uint8_t const idxOtherHstReg = pOther->aidxGstRegShadows[idxGstReg];
3220 if ( idxCurHstReg != idxOtherHstReg
3221 || !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg)))
3222 {
3223#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3224 Log12(("iemNativeEmitEndIf: dropping gst %s (%d) from hst %s (other %d/%#RX64)\n",
3225 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3226 idxOtherHstReg, pOther->bmGstRegShadows));
3227#else
3228 Log12(("iemNativeEmitEndIf: dropping %s gst %s (%d) from hst %s (other %d/%#RX64/%s)\n",
3229 pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "_dirty_" : "clean",
3230 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3231 idxOtherHstReg, pOther->bmGstRegShadows,
3232 pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "dirty" : "clean"));
3233 if (pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3234 fGstRegDirtyTail |= RT_BIT_64(idxGstReg);
3235 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3236 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3237#endif
3238 iemNativeRegClearGstRegShadowingOne(pReNative, idxCurHstReg, (IEMNATIVEGSTREG)idxGstReg, off);
3239 }
3240 } while (fGstRegs);
3241 }
3242 else
3243 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
3244
3245#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3246 /*
3247 * Generate jumpy code for flushing dirty registers from the other
3248 * branch that aren't dirty in the current one.
3249 */
3250 if (!fGstRegDirtyTail)
3251 { /* likely */ }
3252 else
3253 {
3254 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEndIfOtherBranchDirty);
3255 Log12(("iemNativeEmitEndIf: Dirty register only in the other branch: %#RX64 - BAD!\n", fGstRegDirtyTail));
3256
3257 /* First the current branch has to jump over the dirty flushing from the other branch. */
3258 uint32_t const offFixup1 = off;
3259 off = iemNativeEmitJmpToFixed(pReNative, off, off + 10);
3260
3261 /* Put the endif and maybe else label here so the other branch ends up here. */
3262 if (!pEntry->fInElse)
3263 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3264 else
3265 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3266 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3267 fDefinedLabels = true;
3268
3269 /* Flush the dirty guest registers from the other branch. */
3270 while (fGstRegDirtyTail)
3271 {
3272 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegDirtyTail) - 1;
3273 fGstRegDirtyTail &= ~RT_BIT_64(idxGstReg);
3274 Log12(("iemNativeEmitEndIf: tail flushing %s (%d) from other branch %d (cur %d/%#RX64)\n",
3275 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, pOther->aidxGstRegShadows[idxGstReg],
3276 pReNative->Core.aidxGstRegShadows[idxGstReg], pReNative->Core.bmGstRegShadows));
3277
3278 off = iemNativeRegFlushPendingWriteEx(pReNative, off, (PIEMNATIVECORESTATE)pOther, (IEMNATIVEGSTREG)idxGstReg);
3279
3280 /* Mismatching shadowing should've been dropped in the previous step already. */
3281 Assert( !(pReNative->Core.bmGstRegShadows & RT_BIT_64(idxGstReg))
3282 || pReNative->Core.aidxGstRegShadows[idxGstReg] == pOther->aidxGstRegShadows[idxGstReg]);
3283 }
3284
3285 /* Here is the actual endif label, fixup the above jump to land here. */
3286 iemNativeFixupFixedJump(pReNative, offFixup1, off);
3287 }
3288#endif
3289
3290 /*
3291 * Check variables next. For now we must require them to be identical
3292 * or stuff we can recreate. (No code is emitted here.)
3293 */
3294 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
3295#ifdef VBOX_STRICT
3296 uint32_t const offAssert = off;
3297#endif
3298 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
3299 if (fVars)
3300 {
3301 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
3302 do
3303 {
3304 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
3305 fVars &= ~RT_BIT_32(idxVar);
3306
3307 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
3308 {
3309 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
3310 continue;
3311 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3312 {
3313 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3314 if (idxHstReg != UINT8_MAX)
3315 {
3316 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3317 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3318 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
3319 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3320 }
3321 continue;
3322 }
3323 }
3324 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
3325 continue;
3326
3327 /* Irreconcilable, so drop it. */
3328 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3329 if (idxHstReg != UINT8_MAX)
3330 {
3331 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3332 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3333 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
3334 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3335 }
3336 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3337 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
3338 } while (fVars);
3339 }
3340 Assert(off == offAssert);
3341
3342 /*
3343 * Finally, check that the host register allocations matches.
3344 */
3345 AssertMsgStmt((pReNative->Core.bmHstRegs & (pReNative->Core.bmHstRegs ^ pOther->bmHstRegs)) == 0,
3346 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
3347 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
3348 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3349 }
3350 }
3351
3352 /*
3353 * Define the endif label and maybe the else one if we're still in the 'if' part.
3354 */
3355 if (!fDefinedLabels)
3356 {
3357 if (!pEntry->fInElse)
3358 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3359 else
3360 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3361 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3362 }
3363
3364 /* Pop the conditional stack.*/
3365 pReNative->cCondDepth -= 1;
3366
3367 return off;
3368}
3369
3370
3371/**
3372 * Helper function to convert X86_EFL_xxx masks to liveness masks.
3373 *
3374 * The compiler should be able to figure this out at compile time, so sprinkling
3375 * constexpr where ever possible here to nudge it along.
3376 */
3377template<uint32_t const a_fEfl>
3378RT_CONSTEXPR uint64_t iemNativeEflagsToLivenessMask(void)
3379{
3380 return (a_fEfl & ~X86_EFL_STATUS_BITS ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER) : 0)
3381 | (a_fEfl & X86_EFL_CF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_CF) : 0)
3382 | (a_fEfl & X86_EFL_PF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_PF) : 0)
3383 | (a_fEfl & X86_EFL_AF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_AF) : 0)
3384 | (a_fEfl & X86_EFL_ZF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_ZF) : 0)
3385 | (a_fEfl & X86_EFL_SF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_SF) : 0)
3386 | (a_fEfl & X86_EFL_OF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OF) : 0);
3387}
3388
3389
3390/**
3391 * Helper function to convert a single X86_EFL_xxxx value to bit number.
3392 *
3393 * The compiler should be able to figure this out at compile time, so sprinkling
3394 * constexpr where ever possible here to nudge it along.
3395 */
3396template<uint32_t const a_fEfl>
3397RT_CONSTEXPR unsigned iemNativeEflagsToSingleBitNo(void)
3398{
3399 AssertCompile( a_fEfl == X86_EFL_CF
3400 || a_fEfl == X86_EFL_PF
3401 || a_fEfl == X86_EFL_AF
3402 || a_fEfl == X86_EFL_ZF
3403 || a_fEfl == X86_EFL_SF
3404 || a_fEfl == X86_EFL_OF
3405 || a_fEfl == X86_EFL_DF);
3406 return a_fEfl == X86_EFL_CF ? X86_EFL_CF_BIT
3407 : a_fEfl == X86_EFL_PF ? X86_EFL_PF_BIT
3408 : a_fEfl == X86_EFL_AF ? X86_EFL_AF_BIT
3409 : a_fEfl == X86_EFL_ZF ? X86_EFL_ZF_BIT
3410 : a_fEfl == X86_EFL_SF ? X86_EFL_SF_BIT
3411 : a_fEfl == X86_EFL_OF ? X86_EFL_OF_BIT
3412 : X86_EFL_DF_BIT;
3413}
3414
3415
3416#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
3417 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3418 do {
3419
3420/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
3421DECL_INLINE_THROW(uint32_t)
3422iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3423{
3424 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3425 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3426 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3427
3428 /* Get the eflags. */
3429 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3430
3431 /* Test and jump. */
3432 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3433
3434 /* Free but don't flush the EFlags register. */
3435 iemNativeRegFreeTmp(pReNative, idxEflReg);
3436
3437 /* Make a copy of the core state now as we start the if-block. */
3438 iemNativeCondStartIfBlock(pReNative, off);
3439
3440 return off;
3441}
3442
3443
3444#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
3445 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3446 do {
3447
3448/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
3449DECL_INLINE_THROW(uint32_t)
3450iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3451{
3452 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3453 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3454 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3455
3456 /* Get the eflags. */
3457 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3458
3459 /* Test and jump. */
3460 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3461
3462 /* Free but don't flush the EFlags register. */
3463 iemNativeRegFreeTmp(pReNative, idxEflReg);
3464
3465 /* Make a copy of the core state now as we start the if-block. */
3466 iemNativeCondStartIfBlock(pReNative, off);
3467
3468 return off;
3469}
3470
3471
3472#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
3473 off = iemNativeEmitIfEflagsBitSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3474 iemNativeEflagsToLivenessMask<a_fBit>()); \
3475 do {
3476
3477/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
3478DECL_INLINE_THROW(uint32_t)
3479iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3480{
3481 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3482 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3483 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3484
3485 /* Get the eflags. */
3486 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3487
3488 /* Test and jump. */
3489 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3490
3491 /* Free but don't flush the EFlags register. */
3492 iemNativeRegFreeTmp(pReNative, idxEflReg);
3493
3494 /* Make a copy of the core state now as we start the if-block. */
3495 iemNativeCondStartIfBlock(pReNative, off);
3496
3497 return off;
3498}
3499
3500
3501#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
3502 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3503 iemNativeEflagsToLivenessMask<a_fBit>()); \
3504 do {
3505
3506/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
3507DECL_INLINE_THROW(uint32_t)
3508iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3509{
3510 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3511 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3512 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3513
3514 /* Get the eflags. */
3515 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3516
3517 /* Test and jump. */
3518 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3519
3520 /* Free but don't flush the EFlags register. */
3521 iemNativeRegFreeTmp(pReNative, idxEflReg);
3522
3523 /* Make a copy of the core state now as we start the if-block. */
3524 iemNativeCondStartIfBlock(pReNative, off);
3525
3526 return off;
3527}
3528
3529
3530#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
3531 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3532 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3533 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3534 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3535 do {
3536
3537#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
3538 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3539 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3540 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3541 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3542 do {
3543
3544/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
3545DECL_INLINE_THROW(uint32_t)
3546iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3547 bool fInverted, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3548{
3549 Assert(iBitNo1 != iBitNo2);
3550 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3551 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3552 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3553
3554 /* Get the eflags. */
3555 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3556
3557#ifdef RT_ARCH_AMD64
3558 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1));
3559
3560 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3561 if (iBitNo1 > iBitNo2)
3562 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3563 else
3564 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3565 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3566
3567#elif defined(RT_ARCH_ARM64)
3568 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3569 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3570
3571 /* and tmpreg, eflreg, #1<<iBitNo1 */
3572 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3573
3574 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3575 if (iBitNo1 > iBitNo2)
3576 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3577 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3578 else
3579 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3580 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3581
3582 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3583
3584#else
3585# error "Port me"
3586#endif
3587
3588 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3589 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3590 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3591
3592 /* Free but don't flush the EFlags and tmp registers. */
3593 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3594 iemNativeRegFreeTmp(pReNative, idxEflReg);
3595
3596 /* Make a copy of the core state now as we start the if-block. */
3597 iemNativeCondStartIfBlock(pReNative, off);
3598
3599 return off;
3600}
3601
3602
3603#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
3604 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3605 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3606 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3607 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3608 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3609 do {
3610
3611#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
3612 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3613 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3614 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3615 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3616 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3617 do {
3618
3619/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
3620 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
3621DECL_INLINE_THROW(uint32_t)
3622iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fInverted,
3623 unsigned iBitNo, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3624{
3625 Assert(iBitNo1 != iBitNo);
3626 Assert(iBitNo2 != iBitNo);
3627 Assert(iBitNo2 != iBitNo1);
3628 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3629 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3630 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3631
3632 /* We need an if-block label for the non-inverted variant. */
3633 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
3634 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
3635
3636 /* Get the eflags. */
3637 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3638
3639#ifdef RT_ARCH_AMD64
3640 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1)); /* This must come before we jump anywhere! */
3641#elif defined(RT_ARCH_ARM64)
3642 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3643#endif
3644
3645 /* Check for the lone bit first. */
3646 if (!fInverted)
3647 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3648 else
3649 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
3650
3651 /* Then extract and compare the other two bits. */
3652#ifdef RT_ARCH_AMD64
3653 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3654 if (iBitNo1 > iBitNo2)
3655 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3656 else
3657 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3658 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3659
3660#elif defined(RT_ARCH_ARM64)
3661 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3662
3663 /* and tmpreg, eflreg, #1<<iBitNo1 */
3664 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3665
3666 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3667 if (iBitNo1 > iBitNo2)
3668 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3669 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3670 else
3671 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3672 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3673
3674 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3675
3676#else
3677# error "Port me"
3678#endif
3679
3680 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3681 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3682 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3683
3684 /* Free but don't flush the EFlags and tmp registers. */
3685 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3686 iemNativeRegFreeTmp(pReNative, idxEflReg);
3687
3688 /* Make a copy of the core state now as we start the if-block. */
3689 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3690
3691 return off;
3692}
3693
3694
3695#define IEM_MC_IF_CX_IS_NZ() \
3696 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3697 do {
3698
3699/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3700DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3701{
3702 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3703
3704 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3705 kIemNativeGstRegUse_ReadOnly);
3706 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3707 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3708
3709 iemNativeCondStartIfBlock(pReNative, off);
3710 return off;
3711}
3712
3713
3714#define IEM_MC_IF_ECX_IS_NZ() \
3715 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3716 do {
3717
3718#define IEM_MC_IF_RCX_IS_NZ() \
3719 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3720 do {
3721
3722/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3723DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3724{
3725 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3726
3727 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3728 kIemNativeGstRegUse_ReadOnly);
3729 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3730 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3731
3732 iemNativeCondStartIfBlock(pReNative, off);
3733 return off;
3734}
3735
3736
3737#define IEM_MC_IF_CX_IS_NOT_ONE() \
3738 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3739 do {
3740
3741/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3742DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3743{
3744 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3745
3746 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3747 kIemNativeGstRegUse_ReadOnly);
3748#ifdef RT_ARCH_AMD64
3749 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3750#else
3751 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3752 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3753 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3754#endif
3755 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3756
3757 iemNativeCondStartIfBlock(pReNative, off);
3758 return off;
3759}
3760
3761
3762#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3763 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3764 do {
3765
3766#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3767 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3768 do {
3769
3770/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3771DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3772{
3773 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3774
3775 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3776 kIemNativeGstRegUse_ReadOnly);
3777 if (f64Bit)
3778 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3779 else
3780 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3781 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3782
3783 iemNativeCondStartIfBlock(pReNative, off);
3784 return off;
3785}
3786
3787
3788#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3789 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, \
3790 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3791 iemNativeEflagsToLivenessMask<a_fBit>()); \
3792 do {
3793
3794#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3795 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, \
3796 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3797 iemNativeEflagsToLivenessMask<a_fBit>()); \
3798 do {
3799
3800/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3801 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3802DECL_INLINE_THROW(uint32_t)
3803iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3804 bool fCheckIfSet, unsigned iBitNo, uint64_t fLivenessEflBit)
3805{
3806 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3807 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3808 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3809
3810 /* We have to load both RCX and EFLAGS before we can start branching,
3811 otherwise we'll end up in the else-block with an inconsistent
3812 register allocator state.
3813 Doing EFLAGS first as it's more likely to be loaded, right? */
3814 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3815 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3816 kIemNativeGstRegUse_ReadOnly);
3817
3818 /** @todo we could reduce this to a single branch instruction by spending a
3819 * temporary register and some setnz stuff. Not sure if loops are
3820 * worth it. */
3821 /* Check CX. */
3822#ifdef RT_ARCH_AMD64
3823 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3824#else
3825 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3826 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3827 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3828#endif
3829
3830 /* Check the EFlags bit. */
3831 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3832 !fCheckIfSet /*fJmpIfSet*/);
3833
3834 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3835 iemNativeRegFreeTmp(pReNative, idxEflReg);
3836
3837 iemNativeCondStartIfBlock(pReNative, off);
3838 return off;
3839}
3840
3841
3842#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3843 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, false /*f64Bit*/, \
3844 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3845 iemNativeEflagsToLivenessMask<a_fBit>()); \
3846 do {
3847
3848#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3849 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, false /*f64Bit*/, \
3850 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3851 iemNativeEflagsToLivenessMask<a_fBit>()); \
3852 do {
3853
3854#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3855 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, true /*f64Bit*/, \
3856 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3857 iemNativeEflagsToLivenessMask<a_fBit>()); \
3858 do {
3859
3860#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3861 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, true /*f64Bit*/, \
3862 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3863 iemNativeEflagsToLivenessMask<a_fBit>()); \
3864 do {
3865
3866/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3867 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3868 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3869 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3870DECL_INLINE_THROW(uint32_t)
3871iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fCheckIfSet, bool f64Bit,
3872 unsigned iBitNo, uint64_t fLivenessEFlBit)
3873
3874{
3875 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3876 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3877 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3878
3879 /* We have to load both RCX and EFLAGS before we can start branching,
3880 otherwise we'll end up in the else-block with an inconsistent
3881 register allocator state.
3882 Doing EFLAGS first as it's more likely to be loaded, right? */
3883 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEFlBit);
3884 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3885 kIemNativeGstRegUse_ReadOnly);
3886
3887 /** @todo we could reduce this to a single branch instruction by spending a
3888 * temporary register and some setnz stuff. Not sure if loops are
3889 * worth it. */
3890 /* Check RCX/ECX. */
3891 if (f64Bit)
3892 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3893 else
3894 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3895
3896 /* Check the EFlags bit. */
3897 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3898 !fCheckIfSet /*fJmpIfSet*/);
3899
3900 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3901 iemNativeRegFreeTmp(pReNative, idxEflReg);
3902
3903 iemNativeCondStartIfBlock(pReNative, off);
3904 return off;
3905}
3906
3907
3908#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3909 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3910 do {
3911
3912/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3913DECL_INLINE_THROW(uint32_t)
3914iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3915{
3916 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3917
3918 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3919 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3920 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3921 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3922
3923 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3924
3925 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3926
3927 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3928
3929 iemNativeCondStartIfBlock(pReNative, off);
3930 return off;
3931}
3932
3933
3934#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3935 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3936 do {
3937
3938/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3939DECL_INLINE_THROW(uint32_t)
3940iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3941{
3942 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3943 Assert(iGReg < 16);
3944
3945 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3946 kIemNativeGstRegUse_ReadOnly);
3947
3948 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3949
3950 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3951
3952 iemNativeCondStartIfBlock(pReNative, off);
3953 return off;
3954}
3955
3956
3957
3958/*********************************************************************************************************************************
3959* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3960*********************************************************************************************************************************/
3961
3962#define IEM_MC_NOREF(a_Name) \
3963 RT_NOREF_PV(a_Name)
3964
3965#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3966 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3967
3968#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3969 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3970
3971#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3972 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3973
3974#define IEM_MC_LOCAL(a_Type, a_Name) \
3975 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3976
3977#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3978 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3979
3980#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3981 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3982
3983
3984/**
3985 * Sets the host register for @a idxVarRc to @a idxReg.
3986 *
3987 * Any guest register shadowing will be implictly dropped by this call.
3988 *
3989 * The variable must not have any register associated with it (causes
3990 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3991 * implied.
3992 *
3993 * @returns idxReg
3994 * @param pReNative The recompiler state.
3995 * @param idxVar The variable.
3996 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3997 * @param off For recording in debug info.
3998 * @param fAllocated Set if the register is already allocated, false if not.
3999 *
4000 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
4001 */
4002DECL_INLINE_THROW(uint8_t)
4003iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off, bool fAllocated)
4004{
4005 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4006 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4007 Assert(!pVar->fRegAcquired);
4008 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4009 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
4010 AssertStmt(RT_BOOL(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)) == fAllocated,
4011 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
4012
4013 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
4014 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
4015
4016 iemNativeVarSetKindToStack(pReNative, idxVar);
4017 pVar->idxReg = idxReg;
4018
4019 return idxReg;
4020}
4021
4022
4023/**
4024 * A convenient helper function.
4025 */
4026DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
4027 uint8_t idxReg, uint32_t *poff)
4028{
4029 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff, false /*fAllocated*/);
4030 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
4031 return idxReg;
4032}
4033
4034
4035/**
4036 * This is called by IEM_MC_END() to clean up all variables.
4037 */
4038DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
4039{
4040 uint32_t const bmVars = pReNative->Core.bmVars;
4041 if (bmVars != 0)
4042 iemNativeVarFreeAllSlow(pReNative, bmVars);
4043 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
4044 Assert(pReNative->Core.bmStack == 0);
4045}
4046
4047
4048#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
4049
4050/**
4051 * This is called by IEM_MC_FREE_LOCAL.
4052 */
4053DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4054{
4055 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4056 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
4057 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4058}
4059
4060
4061#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
4062
4063/**
4064 * This is called by IEM_MC_FREE_ARG.
4065 */
4066DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4067{
4068 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4069 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
4070 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4071}
4072
4073
4074#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
4075
4076/**
4077 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
4078 */
4079DECL_INLINE_THROW(uint32_t)
4080iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
4081{
4082 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
4083 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
4084 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4085 Assert( pVarDst->cbVar == sizeof(uint16_t)
4086 || pVarDst->cbVar == sizeof(uint32_t));
4087
4088 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
4089 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
4090 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
4091 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
4092 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4093
4094 Assert(pVarDst->cbVar < pVarSrc->cbVar);
4095
4096 /*
4097 * Special case for immediates.
4098 */
4099 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
4100 {
4101 switch (pVarDst->cbVar)
4102 {
4103 case sizeof(uint16_t):
4104 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
4105 break;
4106 case sizeof(uint32_t):
4107 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
4108 break;
4109 default: AssertFailed(); break;
4110 }
4111 }
4112 else
4113 {
4114 /*
4115 * The generic solution for now.
4116 */
4117 /** @todo optimize this by having the python script make sure the source
4118 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
4119 * statement. Then we could just transfer the register assignments. */
4120 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
4121 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
4122 switch (pVarDst->cbVar)
4123 {
4124 case sizeof(uint16_t):
4125 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
4126 break;
4127 case sizeof(uint32_t):
4128 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
4129 break;
4130 default: AssertFailed(); break;
4131 }
4132 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
4133 iemNativeVarRegisterRelease(pReNative, idxVarDst);
4134 }
4135 return off;
4136}
4137
4138
4139
4140/*********************************************************************************************************************************
4141* Emitters for IEM_MC_CALL_CIMPL_XXX *
4142*********************************************************************************************************************************/
4143
4144/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
4145DECL_INLINE_THROW(uint32_t)
4146iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
4147 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
4148
4149{
4150 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
4151 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
4152
4153#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4154 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
4155 when a calls clobber any of the relevant control registers. */
4156# if 1
4157 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
4158 {
4159 /* Likely as long as call+ret are done via cimpl. */
4160 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
4161 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
4162 }
4163 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
4164 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4165 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
4166 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4167 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
4168 else
4169 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4170 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4171 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4172
4173# else
4174 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
4175 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4176 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
4177 pReNative->fSimdRaiseXcptChecksEmitted = 0;
4178 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
4179 || pfnCImpl == (uintptr_t)iemCImpl_callf
4180 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
4181 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
4182 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4183 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4184 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4185# endif
4186
4187# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
4188 /* Mark the host floating point control register as not synced if MXCSR is modified. */
4189 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
4190 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
4191# endif
4192#endif
4193
4194 /*
4195 * Do all the call setup and cleanup.
4196 */
4197 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
4198
4199 /*
4200 * Load the two or three hidden arguments.
4201 */
4202#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4203 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4204 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4205 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
4206#else
4207 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4208 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
4209#endif
4210
4211 /*
4212 * Make the call and check the return code.
4213 *
4214 * Shadow PC copies are always flushed here, other stuff depends on flags.
4215 * Segment and general purpose registers are explictily flushed via the
4216 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
4217 * macros.
4218 */
4219 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
4220#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4221 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4222#endif
4223 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
4224 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
4225 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4226 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4227
4228#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
4229 pReNative->Core.fDebugPcInitialized = false;
4230 Log4(("fDebugPcInitialized=false cimpl off=%#x (v1)\n", off));
4231#endif
4232
4233 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4234}
4235
4236
4237#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4238 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
4239
4240/** Emits code for IEM_MC_CALL_CIMPL_1. */
4241DECL_INLINE_THROW(uint32_t)
4242iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4243 uintptr_t pfnCImpl, uint8_t idxArg0)
4244{
4245 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4246 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
4247}
4248
4249
4250#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4251 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
4252
4253/** Emits code for IEM_MC_CALL_CIMPL_2. */
4254DECL_INLINE_THROW(uint32_t)
4255iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4256 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
4257{
4258 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4259 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4260 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
4261}
4262
4263
4264#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4265 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4266 (uintptr_t)a_pfnCImpl, a0, a1, a2)
4267
4268/** Emits code for IEM_MC_CALL_CIMPL_3. */
4269DECL_INLINE_THROW(uint32_t)
4270iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4271 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4272{
4273 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4274 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4275 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4276 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
4277}
4278
4279
4280#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
4281 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4282 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
4283
4284/** Emits code for IEM_MC_CALL_CIMPL_4. */
4285DECL_INLINE_THROW(uint32_t)
4286iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4287 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4288{
4289 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4290 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4291 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4292 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4293 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
4294}
4295
4296
4297#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
4298 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4299 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
4300
4301/** Emits code for IEM_MC_CALL_CIMPL_4. */
4302DECL_INLINE_THROW(uint32_t)
4303iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4304 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
4305{
4306 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4307 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4308 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4309 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4310 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
4311 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
4312}
4313
4314
4315/** Recompiler debugging: Flush guest register shadow copies. */
4316#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
4317
4318
4319
4320/*********************************************************************************************************************************
4321* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
4322*********************************************************************************************************************************/
4323
4324/**
4325 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
4326 */
4327DECL_INLINE_THROW(uint32_t)
4328iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4329 uintptr_t pfnAImpl, uint8_t cArgs)
4330{
4331 if (idxVarRc != UINT8_MAX)
4332 {
4333 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
4334 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
4335 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
4336 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
4337 }
4338
4339 /*
4340 * Do all the call setup and cleanup.
4341 *
4342 * It is only required to flush pending guest register writes in call volatile registers as
4343 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
4344 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
4345 * no matter the fFlushPendingWrites parameter.
4346 */
4347 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
4348
4349 /*
4350 * Make the call and update the return code variable if we've got one.
4351 */
4352 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
4353 if (idxVarRc != UINT8_MAX)
4354 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off, false /*fAllocated*/);
4355
4356 return off;
4357}
4358
4359
4360
4361#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
4362 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
4363
4364#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
4365 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
4366
4367/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
4368DECL_INLINE_THROW(uint32_t)
4369iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
4370{
4371 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
4372}
4373
4374
4375#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
4376 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
4377
4378#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
4379 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
4380
4381/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
4382DECL_INLINE_THROW(uint32_t)
4383iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
4384{
4385 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4386 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
4387}
4388
4389
4390#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
4391 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
4392
4393#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
4394 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
4395
4396/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
4397DECL_INLINE_THROW(uint32_t)
4398iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4399 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
4400{
4401 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4402 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4403 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
4404}
4405
4406
4407#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
4408 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
4409
4410#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
4411 IEM_MC_LOCAL(a_rcType, a_rc); \
4412 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
4413
4414/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
4415DECL_INLINE_THROW(uint32_t)
4416iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4417 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4418{
4419 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4420 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4421 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4422 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
4423}
4424
4425
4426#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
4427 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4428
4429#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
4430 IEM_MC_LOCAL(a_rcType, a_rc); \
4431 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4432
4433/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
4434DECL_INLINE_THROW(uint32_t)
4435iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4436 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4437{
4438 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4439 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4440 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4441 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
4442 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
4443}
4444
4445
4446
4447/*********************************************************************************************************************************
4448* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
4449*********************************************************************************************************************************/
4450
4451#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
4452 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
4453
4454#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4455 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
4456
4457#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4458 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
4459
4460#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4461 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
4462
4463
4464/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
4465 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
4466DECL_INLINE_THROW(uint32_t)
4467iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
4468{
4469 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4470 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4471 Assert(iGRegEx < 20);
4472
4473 /* Same discussion as in iemNativeEmitFetchGregU16 */
4474 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4475 kIemNativeGstRegUse_ReadOnly);
4476
4477 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4478 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4479
4480 /* The value is zero-extended to the full 64-bit host register width. */
4481 if (iGRegEx < 16)
4482 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4483 else
4484 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4485
4486 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4487 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4488 return off;
4489}
4490
4491
4492#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4493 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
4494
4495#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4496 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
4497
4498#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4499 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
4500
4501/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
4502DECL_INLINE_THROW(uint32_t)
4503iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
4504{
4505 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4506 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4507 Assert(iGRegEx < 20);
4508
4509 /* Same discussion as in iemNativeEmitFetchGregU16 */
4510 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4511 kIemNativeGstRegUse_ReadOnly);
4512
4513 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4514 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4515
4516 if (iGRegEx < 16)
4517 {
4518 switch (cbSignExtended)
4519 {
4520 case sizeof(uint16_t):
4521 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4522 break;
4523 case sizeof(uint32_t):
4524 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4525 break;
4526 case sizeof(uint64_t):
4527 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4528 break;
4529 default: AssertFailed(); break;
4530 }
4531 }
4532 else
4533 {
4534 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4535 switch (cbSignExtended)
4536 {
4537 case sizeof(uint16_t):
4538 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4539 break;
4540 case sizeof(uint32_t):
4541 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4542 break;
4543 case sizeof(uint64_t):
4544 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4545 break;
4546 default: AssertFailed(); break;
4547 }
4548 }
4549
4550 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4551 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4552 return off;
4553}
4554
4555
4556
4557#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
4558 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
4559
4560#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
4561 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4562
4563#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
4564 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4565
4566/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
4567DECL_INLINE_THROW(uint32_t)
4568iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4569{
4570 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4571 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4572 Assert(iGReg < 16);
4573
4574 /*
4575 * We can either just load the low 16-bit of the GPR into a host register
4576 * for the variable, or we can do so via a shadow copy host register. The
4577 * latter will avoid having to reload it if it's being stored later, but
4578 * will waste a host register if it isn't touched again. Since we don't
4579 * know what going to happen, we choose the latter for now.
4580 */
4581 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4582 kIemNativeGstRegUse_ReadOnly);
4583
4584 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4585 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4586 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4587 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4588
4589 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4590 return off;
4591}
4592
4593#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
4594 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
4595
4596/** Emits code for IEM_MC_FETCH_GREG_I16. */
4597DECL_INLINE_THROW(uint32_t)
4598iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4599{
4600 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4601 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
4602 Assert(iGReg < 16);
4603
4604 /*
4605 * We can either just load the low 16-bit of the GPR into a host register
4606 * for the variable, or we can do so via a shadow copy host register. The
4607 * latter will avoid having to reload it if it's being stored later, but
4608 * will waste a host register if it isn't touched again. Since we don't
4609 * know what going to happen, we choose the latter for now.
4610 */
4611 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4612 kIemNativeGstRegUse_ReadOnly);
4613
4614 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4615 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4616#ifdef RT_ARCH_AMD64
4617 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4618#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
4619 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4620#endif
4621 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4622
4623 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4624 return off;
4625}
4626
4627
4628#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
4629 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4630
4631#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
4632 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4633
4634/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
4635DECL_INLINE_THROW(uint32_t)
4636iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
4637{
4638 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4639 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4640 Assert(iGReg < 16);
4641
4642 /*
4643 * We can either just load the low 16-bit of the GPR into a host register
4644 * for the variable, or we can do so via a shadow copy host register. The
4645 * latter will avoid having to reload it if it's being stored later, but
4646 * will waste a host register if it isn't touched again. Since we don't
4647 * know what going to happen, we choose the latter for now.
4648 */
4649 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4650 kIemNativeGstRegUse_ReadOnly);
4651
4652 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4653 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4654 if (cbSignExtended == sizeof(uint32_t))
4655 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4656 else
4657 {
4658 Assert(cbSignExtended == sizeof(uint64_t));
4659 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4660 }
4661 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4662
4663 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4664 return off;
4665}
4666
4667
4668#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
4669 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
4670
4671#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
4672 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
4673
4674#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
4675 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
4676
4677/** Emits code for IEM_MC_FETCH_GREG_U32. */
4678DECL_INLINE_THROW(uint32_t)
4679iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4680{
4681 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4682 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4683 Assert(iGReg < 16);
4684
4685 /*
4686 * We can either just load the low 16-bit of the GPR into a host register
4687 * for the variable, or we can do so via a shadow copy host register. The
4688 * latter will avoid having to reload it if it's being stored later, but
4689 * will waste a host register if it isn't touched again. Since we don't
4690 * know what going to happen, we choose the latter for now.
4691 */
4692 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4693 kIemNativeGstRegUse_ReadOnly);
4694
4695 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4696 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4697 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4698 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4699
4700 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4701 return off;
4702}
4703
4704
4705#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4706 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4707
4708/** Emits code for IEM_MC_FETCH_GREG_U32. */
4709DECL_INLINE_THROW(uint32_t)
4710iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4711{
4712 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4713 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4714 Assert(iGReg < 16);
4715
4716 /*
4717 * We can either just load the low 32-bit of the GPR into a host register
4718 * for the variable, or we can do so via a shadow copy host register. The
4719 * latter will avoid having to reload it if it's being stored later, but
4720 * will waste a host register if it isn't touched again. Since we don't
4721 * know what going to happen, we choose the latter for now.
4722 */
4723 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4724 kIemNativeGstRegUse_ReadOnly);
4725
4726 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4727 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4728 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4729 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4730
4731 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4732 return off;
4733}
4734
4735
4736#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4737 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4738
4739#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4740 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4741
4742/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4743 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4744DECL_INLINE_THROW(uint32_t)
4745iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4746{
4747 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4748 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4749 Assert(iGReg < 16);
4750
4751 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4752 kIemNativeGstRegUse_ReadOnly);
4753
4754 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4755 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4756 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4757 /** @todo name the register a shadow one already? */
4758 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4759
4760 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4761 return off;
4762}
4763
4764
4765#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4766#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4767 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4768
4769/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4770DECL_INLINE_THROW(uint32_t)
4771iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4772{
4773 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4774 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4775 Assert(iGRegLo < 16 && iGRegHi < 16);
4776
4777 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4778 kIemNativeGstRegUse_ReadOnly);
4779 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4780 kIemNativeGstRegUse_ReadOnly);
4781
4782 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4783 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4784 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4785 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4786
4787 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4788 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4789 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4790 return off;
4791}
4792#endif
4793
4794
4795/*********************************************************************************************************************************
4796* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4797*********************************************************************************************************************************/
4798
4799#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4800 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4801
4802/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4803DECL_INLINE_THROW(uint32_t)
4804iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4805{
4806 Assert(iGRegEx < 20);
4807 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4808 kIemNativeGstRegUse_ForUpdate);
4809#ifdef RT_ARCH_AMD64
4810 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4811
4812 /* To the lowest byte of the register: mov r8, imm8 */
4813 if (iGRegEx < 16)
4814 {
4815 if (idxGstTmpReg >= 8)
4816 pbCodeBuf[off++] = X86_OP_REX_B;
4817 else if (idxGstTmpReg >= 4)
4818 pbCodeBuf[off++] = X86_OP_REX;
4819 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4820 pbCodeBuf[off++] = u8Value;
4821 }
4822 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4823 else if (idxGstTmpReg < 4)
4824 {
4825 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4826 pbCodeBuf[off++] = u8Value;
4827 }
4828 else
4829 {
4830 /* ror reg64, 8 */
4831 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4832 pbCodeBuf[off++] = 0xc1;
4833 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4834 pbCodeBuf[off++] = 8;
4835
4836 /* mov reg8, imm8 */
4837 if (idxGstTmpReg >= 8)
4838 pbCodeBuf[off++] = X86_OP_REX_B;
4839 else if (idxGstTmpReg >= 4)
4840 pbCodeBuf[off++] = X86_OP_REX;
4841 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4842 pbCodeBuf[off++] = u8Value;
4843
4844 /* rol reg64, 8 */
4845 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4846 pbCodeBuf[off++] = 0xc1;
4847 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4848 pbCodeBuf[off++] = 8;
4849 }
4850
4851#elif defined(RT_ARCH_ARM64)
4852 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4853 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4854 if (iGRegEx < 16)
4855 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4856 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4857 else
4858 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4859 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4860 iemNativeRegFreeTmp(pReNative, idxImmReg);
4861
4862#else
4863# error "Port me!"
4864#endif
4865
4866 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4867
4868#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4869 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4870#endif
4871
4872 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4873 return off;
4874}
4875
4876
4877#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4878 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4879
4880/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4881DECL_INLINE_THROW(uint32_t)
4882iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4883{
4884 Assert(iGRegEx < 20);
4885 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4886
4887 /*
4888 * If it's a constant value (unlikely) we treat this as a
4889 * IEM_MC_STORE_GREG_U8_CONST statement.
4890 */
4891 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4892 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4893 { /* likely */ }
4894 else
4895 {
4896 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4897 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4898 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4899 }
4900
4901 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4902 kIemNativeGstRegUse_ForUpdate);
4903 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4904
4905#ifdef RT_ARCH_AMD64
4906 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4907 if (iGRegEx < 16)
4908 {
4909 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4910 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4911 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4912 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4913 pbCodeBuf[off++] = X86_OP_REX;
4914 pbCodeBuf[off++] = 0x8a;
4915 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4916 }
4917 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4918 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4919 {
4920 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4921 pbCodeBuf[off++] = 0x8a;
4922 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4923 }
4924 else
4925 {
4926 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4927
4928 /* ror reg64, 8 */
4929 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4930 pbCodeBuf[off++] = 0xc1;
4931 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4932 pbCodeBuf[off++] = 8;
4933
4934 /* mov reg8, reg8(r/m) */
4935 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4936 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4937 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4938 pbCodeBuf[off++] = X86_OP_REX;
4939 pbCodeBuf[off++] = 0x8a;
4940 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4941
4942 /* rol reg64, 8 */
4943 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4944 pbCodeBuf[off++] = 0xc1;
4945 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4946 pbCodeBuf[off++] = 8;
4947 }
4948
4949#elif defined(RT_ARCH_ARM64)
4950 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4951 or
4952 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4953 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4954 if (iGRegEx < 16)
4955 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4956 else
4957 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4958
4959#else
4960# error "Port me!"
4961#endif
4962 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4963
4964 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4965
4966#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4967 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4968#endif
4969 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4970 return off;
4971}
4972
4973
4974
4975#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4976 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4977
4978/** Emits code for IEM_MC_STORE_GREG_U16. */
4979DECL_INLINE_THROW(uint32_t)
4980iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4981{
4982 Assert(iGReg < 16);
4983 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4984 kIemNativeGstRegUse_ForUpdate);
4985#ifdef RT_ARCH_AMD64
4986 /* mov reg16, imm16 */
4987 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4988 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4989 if (idxGstTmpReg >= 8)
4990 pbCodeBuf[off++] = X86_OP_REX_B;
4991 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4992 pbCodeBuf[off++] = RT_BYTE1(uValue);
4993 pbCodeBuf[off++] = RT_BYTE2(uValue);
4994
4995#elif defined(RT_ARCH_ARM64)
4996 /* movk xdst, #uValue, lsl #0 */
4997 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4998 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4999
5000#else
5001# error "Port me!"
5002#endif
5003
5004 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5005
5006#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5007 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5008#endif
5009 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5010 return off;
5011}
5012
5013
5014#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
5015 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
5016
5017/** Emits code for IEM_MC_STORE_GREG_U16. */
5018DECL_INLINE_THROW(uint32_t)
5019iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5020{
5021 Assert(iGReg < 16);
5022 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5023
5024 /*
5025 * If it's a constant value (unlikely) we treat this as a
5026 * IEM_MC_STORE_GREG_U16_CONST statement.
5027 */
5028 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5029 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5030 { /* likely */ }
5031 else
5032 {
5033 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5034 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5035 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
5036 }
5037
5038 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5039 kIemNativeGstRegUse_ForUpdate);
5040
5041#ifdef RT_ARCH_AMD64
5042 /* mov reg16, reg16 or [mem16] */
5043 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
5044 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5045 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5046 {
5047 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
5048 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
5049 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
5050 pbCodeBuf[off++] = 0x8b;
5051 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
5052 }
5053 else
5054 {
5055 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
5056 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
5057 if (idxGstTmpReg >= 8)
5058 pbCodeBuf[off++] = X86_OP_REX_R;
5059 pbCodeBuf[off++] = 0x8b;
5060 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
5061 }
5062
5063#elif defined(RT_ARCH_ARM64)
5064 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
5065 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
5066 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5067 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
5068 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5069
5070#else
5071# error "Port me!"
5072#endif
5073
5074 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5075
5076#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5077 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5078#endif
5079 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5080 return off;
5081}
5082
5083
5084#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
5085 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
5086
5087/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
5088DECL_INLINE_THROW(uint32_t)
5089iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
5090{
5091 Assert(iGReg < 16);
5092 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5093 kIemNativeGstRegUse_ForFullWrite);
5094 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5095#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5096 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5097#endif
5098 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5099 return off;
5100}
5101
5102
5103#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
5104 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
5105
5106#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
5107 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
5108
5109/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
5110DECL_INLINE_THROW(uint32_t)
5111iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5112{
5113 Assert(iGReg < 16);
5114 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5115
5116 /*
5117 * If it's a constant value (unlikely) we treat this as a
5118 * IEM_MC_STORE_GREG_U32_CONST statement.
5119 */
5120 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5121 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5122 { /* likely */ }
5123 else
5124 {
5125 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5126 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5127 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
5128 }
5129
5130 /*
5131 * For the rest we allocate a guest register for the variable and writes
5132 * it to the CPUMCTX structure.
5133 */
5134 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5135#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5136 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5137#else
5138 RT_NOREF(idxVarReg);
5139#endif
5140#ifdef VBOX_STRICT
5141 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
5142#endif
5143 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5144 return off;
5145}
5146
5147
5148#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
5149 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
5150
5151/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
5152DECL_INLINE_THROW(uint32_t)
5153iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
5154{
5155 Assert(iGReg < 16);
5156 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5157 kIemNativeGstRegUse_ForFullWrite);
5158 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5159#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5160 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5161#endif
5162 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5163 return off;
5164}
5165
5166
5167#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
5168 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
5169
5170#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
5171 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
5172
5173/** Emits code for IEM_MC_STORE_GREG_U64. */
5174DECL_INLINE_THROW(uint32_t)
5175iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5176{
5177 Assert(iGReg < 16);
5178 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5179
5180 /*
5181 * If it's a constant value (unlikely) we treat this as a
5182 * IEM_MC_STORE_GREG_U64_CONST statement.
5183 */
5184 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5185 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5186 { /* likely */ }
5187 else
5188 {
5189 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5190 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5191 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
5192 }
5193
5194 /*
5195 * For the rest we allocate a guest register for the variable and writes
5196 * it to the CPUMCTX structure.
5197 */
5198 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5199#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5200 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5201#else
5202 RT_NOREF(idxVarReg);
5203#endif
5204 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5205 return off;
5206}
5207
5208
5209#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
5210 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
5211
5212/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
5213DECL_INLINE_THROW(uint32_t)
5214iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
5215{
5216 Assert(iGReg < 16);
5217 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5218 kIemNativeGstRegUse_ForUpdate);
5219 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
5220#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5221 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5222#endif
5223 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5224 return off;
5225}
5226
5227
5228#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5229#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
5230 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
5231
5232/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
5233DECL_INLINE_THROW(uint32_t)
5234iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
5235{
5236 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5237 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
5238 Assert(iGRegLo < 16 && iGRegHi < 16);
5239
5240 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
5241 kIemNativeGstRegUse_ForFullWrite);
5242 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
5243 kIemNativeGstRegUse_ForFullWrite);
5244
5245 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5246 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
5247 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
5248 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
5249
5250 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
5251 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
5252 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
5253 return off;
5254}
5255#endif
5256
5257
5258/*********************************************************************************************************************************
5259* General purpose register manipulation (add, sub). *
5260*********************************************************************************************************************************/
5261
5262#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5263 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5264
5265/** Emits code for IEM_MC_ADD_GREG_U16. */
5266DECL_INLINE_THROW(uint32_t)
5267iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
5268{
5269 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5270 kIemNativeGstRegUse_ForUpdate);
5271
5272#ifdef RT_ARCH_AMD64
5273 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5274 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5275 if (idxGstTmpReg >= 8)
5276 pbCodeBuf[off++] = X86_OP_REX_B;
5277 if (uAddend == 1)
5278 {
5279 pbCodeBuf[off++] = 0xff; /* inc */
5280 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5281 }
5282 else
5283 {
5284 pbCodeBuf[off++] = 0x81;
5285 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5286 pbCodeBuf[off++] = uAddend;
5287 pbCodeBuf[off++] = 0;
5288 }
5289
5290#else
5291 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5292 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5293
5294 /* sub tmp, gstgrp, uAddend */
5295 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
5296
5297 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5298 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5299
5300 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5301#endif
5302
5303 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5304
5305#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5306 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5307#endif
5308
5309 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5310 return off;
5311}
5312
5313
5314#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
5315 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5316
5317#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
5318 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5319
5320/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
5321DECL_INLINE_THROW(uint32_t)
5322iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
5323{
5324 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5325 kIemNativeGstRegUse_ForUpdate);
5326
5327#ifdef RT_ARCH_AMD64
5328 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5329 if (f64Bit)
5330 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5331 else if (idxGstTmpReg >= 8)
5332 pbCodeBuf[off++] = X86_OP_REX_B;
5333 if (uAddend == 1)
5334 {
5335 pbCodeBuf[off++] = 0xff; /* inc */
5336 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5337 }
5338 else if (uAddend < 128)
5339 {
5340 pbCodeBuf[off++] = 0x83; /* add */
5341 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5342 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5343 }
5344 else
5345 {
5346 pbCodeBuf[off++] = 0x81; /* add */
5347 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5348 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5349 pbCodeBuf[off++] = 0;
5350 pbCodeBuf[off++] = 0;
5351 pbCodeBuf[off++] = 0;
5352 }
5353
5354#else
5355 /* sub tmp, gstgrp, uAddend */
5356 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5357 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
5358
5359#endif
5360
5361 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5362
5363#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5364 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5365#endif
5366
5367 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5368 return off;
5369}
5370
5371
5372
5373#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5374 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5375
5376/** Emits code for IEM_MC_SUB_GREG_U16. */
5377DECL_INLINE_THROW(uint32_t)
5378iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5379{
5380 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5381 kIemNativeGstRegUse_ForUpdate);
5382
5383#ifdef RT_ARCH_AMD64
5384 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5385 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5386 if (idxGstTmpReg >= 8)
5387 pbCodeBuf[off++] = X86_OP_REX_B;
5388 if (uSubtrahend == 1)
5389 {
5390 pbCodeBuf[off++] = 0xff; /* dec */
5391 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5392 }
5393 else
5394 {
5395 pbCodeBuf[off++] = 0x81;
5396 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5397 pbCodeBuf[off++] = uSubtrahend;
5398 pbCodeBuf[off++] = 0;
5399 }
5400
5401#else
5402 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5403 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5404
5405 /* sub tmp, gstgrp, uSubtrahend */
5406 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5407
5408 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5409 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5410
5411 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5412#endif
5413
5414 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5415
5416#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5417 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5418#endif
5419
5420 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5421 return off;
5422}
5423
5424
5425#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5426 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5427
5428#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5429 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5430
5431/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5432DECL_INLINE_THROW(uint32_t)
5433iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5434{
5435 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5436 kIemNativeGstRegUse_ForUpdate);
5437
5438#ifdef RT_ARCH_AMD64
5439 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5440 if (f64Bit)
5441 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5442 else if (idxGstTmpReg >= 8)
5443 pbCodeBuf[off++] = X86_OP_REX_B;
5444 if (uSubtrahend == 1)
5445 {
5446 pbCodeBuf[off++] = 0xff; /* dec */
5447 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5448 }
5449 else if (uSubtrahend < 128)
5450 {
5451 pbCodeBuf[off++] = 0x83; /* sub */
5452 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5453 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5454 }
5455 else
5456 {
5457 pbCodeBuf[off++] = 0x81; /* sub */
5458 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5459 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5460 pbCodeBuf[off++] = 0;
5461 pbCodeBuf[off++] = 0;
5462 pbCodeBuf[off++] = 0;
5463 }
5464
5465#else
5466 /* sub tmp, gstgrp, uSubtrahend */
5467 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5468 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5469
5470#endif
5471
5472 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5473
5474#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5475 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5476#endif
5477
5478 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5479 return off;
5480}
5481
5482
5483#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
5484 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5485
5486#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
5487 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5488
5489#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
5490 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5491
5492#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
5493 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5494
5495/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
5496DECL_INLINE_THROW(uint32_t)
5497iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5498{
5499#ifdef VBOX_STRICT
5500 switch (cbMask)
5501 {
5502 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5503 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5504 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5505 case sizeof(uint64_t): break;
5506 default: AssertFailedBreak();
5507 }
5508#endif
5509
5510 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5511 kIemNativeGstRegUse_ForUpdate);
5512
5513 switch (cbMask)
5514 {
5515 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5516 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
5517 break;
5518 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
5519 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
5520 break;
5521 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5522 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5523 break;
5524 case sizeof(uint64_t):
5525 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
5526 break;
5527 default: AssertFailedBreak();
5528 }
5529
5530 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5531
5532#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5533 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5534#endif
5535
5536 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5537 return off;
5538}
5539
5540
5541#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
5542 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5543
5544#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
5545 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5546
5547#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
5548 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5549
5550#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
5551 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5552
5553/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
5554DECL_INLINE_THROW(uint32_t)
5555iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5556{
5557#ifdef VBOX_STRICT
5558 switch (cbMask)
5559 {
5560 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5561 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5562 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5563 case sizeof(uint64_t): break;
5564 default: AssertFailedBreak();
5565 }
5566#endif
5567
5568 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5569 kIemNativeGstRegUse_ForUpdate);
5570
5571 switch (cbMask)
5572 {
5573 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5574 case sizeof(uint16_t):
5575 case sizeof(uint64_t):
5576 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
5577 break;
5578 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5579 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5580 break;
5581 default: AssertFailedBreak();
5582 }
5583
5584 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5585
5586#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5587 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5588#endif
5589
5590 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5591 return off;
5592}
5593
5594
5595/*********************************************************************************************************************************
5596* Local/Argument variable manipulation (add, sub, and, or). *
5597*********************************************************************************************************************************/
5598
5599#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
5600 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5601
5602#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
5603 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5604
5605#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
5606 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5607
5608#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
5609 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5610
5611
5612#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
5613 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
5614
5615#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
5616 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
5617
5618#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
5619 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
5620
5621/** Emits code for AND'ing a local and a constant value. */
5622DECL_INLINE_THROW(uint32_t)
5623iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5624{
5625#ifdef VBOX_STRICT
5626 switch (cbMask)
5627 {
5628 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5629 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5630 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5631 case sizeof(uint64_t): break;
5632 default: AssertFailedBreak();
5633 }
5634#endif
5635
5636 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5637 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5638
5639 if (cbMask <= sizeof(uint32_t))
5640 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
5641 else
5642 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
5643
5644 iemNativeVarRegisterRelease(pReNative, idxVar);
5645 return off;
5646}
5647
5648
5649#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
5650 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5651
5652#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
5653 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5654
5655#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
5656 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5657
5658#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
5659 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5660
5661/** Emits code for OR'ing a local and a constant value. */
5662DECL_INLINE_THROW(uint32_t)
5663iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5664{
5665#ifdef VBOX_STRICT
5666 switch (cbMask)
5667 {
5668 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5669 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5670 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5671 case sizeof(uint64_t): break;
5672 default: AssertFailedBreak();
5673 }
5674#endif
5675
5676 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5677 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5678
5679 if (cbMask <= sizeof(uint32_t))
5680 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
5681 else
5682 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
5683
5684 iemNativeVarRegisterRelease(pReNative, idxVar);
5685 return off;
5686}
5687
5688
5689#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
5690 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
5691
5692#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
5693 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
5694
5695#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
5696 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
5697
5698/** Emits code for reversing the byte order in a local value. */
5699DECL_INLINE_THROW(uint32_t)
5700iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
5701{
5702 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5703 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5704
5705 switch (cbLocal)
5706 {
5707 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5708 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5709 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5710 default: AssertFailedBreak();
5711 }
5712
5713 iemNativeVarRegisterRelease(pReNative, idxVar);
5714 return off;
5715}
5716
5717
5718#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5719 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5720
5721#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5722 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5723
5724#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5725 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5726
5727/** Emits code for shifting left a local value. */
5728DECL_INLINE_THROW(uint32_t)
5729iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5730{
5731#ifdef VBOX_STRICT
5732 switch (cbLocal)
5733 {
5734 case sizeof(uint8_t): Assert(cShift < 8); break;
5735 case sizeof(uint16_t): Assert(cShift < 16); break;
5736 case sizeof(uint32_t): Assert(cShift < 32); break;
5737 case sizeof(uint64_t): Assert(cShift < 64); break;
5738 default: AssertFailedBreak();
5739 }
5740#endif
5741
5742 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5743 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5744
5745 if (cbLocal <= sizeof(uint32_t))
5746 {
5747 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5748 if (cbLocal < sizeof(uint32_t))
5749 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5750 cbLocal == sizeof(uint16_t)
5751 ? UINT32_C(0xffff)
5752 : UINT32_C(0xff));
5753 }
5754 else
5755 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5756
5757 iemNativeVarRegisterRelease(pReNative, idxVar);
5758 return off;
5759}
5760
5761
5762#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5763 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5764
5765#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5766 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5767
5768#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5769 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5770
5771/** Emits code for shifting left a local value. */
5772DECL_INLINE_THROW(uint32_t)
5773iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5774{
5775#ifdef VBOX_STRICT
5776 switch (cbLocal)
5777 {
5778 case sizeof(int8_t): Assert(cShift < 8); break;
5779 case sizeof(int16_t): Assert(cShift < 16); break;
5780 case sizeof(int32_t): Assert(cShift < 32); break;
5781 case sizeof(int64_t): Assert(cShift < 64); break;
5782 default: AssertFailedBreak();
5783 }
5784#endif
5785
5786 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5787 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5788
5789 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5790 if (cbLocal == sizeof(uint8_t))
5791 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5792 else if (cbLocal == sizeof(uint16_t))
5793 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5794
5795 if (cbLocal <= sizeof(uint32_t))
5796 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5797 else
5798 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5799
5800 iemNativeVarRegisterRelease(pReNative, idxVar);
5801 return off;
5802}
5803
5804
5805#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5806 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5807
5808#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5809 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5810
5811#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5812 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5813
5814/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5815DECL_INLINE_THROW(uint32_t)
5816iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5817{
5818 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5819 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5820 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5821 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5822
5823 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5824 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
5825
5826 /* Need to sign extend the value. */
5827 if (cbLocal <= sizeof(uint32_t))
5828 {
5829/** @todo ARM64: In case of boredone, the extended add instruction can do the
5830 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5831 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5832
5833 switch (cbLocal)
5834 {
5835 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5836 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5837 default: AssertFailed();
5838 }
5839
5840 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5841 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5842 }
5843 else
5844 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5845
5846 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5847 iemNativeVarRegisterRelease(pReNative, idxVar);
5848 return off;
5849}
5850
5851
5852
5853/*********************************************************************************************************************************
5854* EFLAGS *
5855*********************************************************************************************************************************/
5856
5857#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5858# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5859#else
5860# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5861 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5862
5863DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5864{
5865 if (fEflOutput)
5866 {
5867 PVMCPUCC const pVCpu = pReNative->pVCpu;
5868# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5869 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5870 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5871 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5872# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5873 if (fEflOutput & (a_fEfl)) \
5874 { \
5875 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5876 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5877 else \
5878 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5879 } else do { } while (0)
5880# else
5881 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5882 IEMLIVENESSBIT const LivenessClobbered = { IEMLIVENESS_STATE_GET_WILL_BE_CLOBBERED_SET(pLivenessEntry) };
5883 IEMLIVENESSBIT const LivenessDelayable = { IEMLIVENESS_STATE_GET_CAN_BE_POSTPONED_SET(pLivenessEntry) };
5884# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5885 if (fEflOutput & (a_fEfl)) \
5886 { \
5887 if (LivenessClobbered.a_fLivenessMember) \
5888 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5889 else if (LivenessDelayable.a_fLivenessMember) \
5890 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5891 else \
5892 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5893 } else do { } while (0)
5894# endif
5895 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5896 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5897 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5898 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5899 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5900 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5901 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5902# undef CHECK_FLAG_AND_UPDATE_STATS
5903 }
5904 RT_NOREF(fEflInput);
5905}
5906#endif /* VBOX_WITH_STATISTICS */
5907
5908#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5909#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5910 off = iemNativeEmitFetchEFlags<a_fEflInput, iemNativeEflagsToLivenessMask<a_fEflInput>(),\
5911 a_fEflOutput, iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags)
5912
5913/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5914template<uint32_t const a_fEflInput, uint64_t const a_fLivenessEflInput,
5915 uint32_t const a_fEflOutput, uint64_t const a_fLivenessEflOutput>
5916DECL_INLINE_THROW(uint32_t)
5917iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
5918{
5919 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5920 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5921 /** @todo fix NOT AssertCompile(a_fEflInput != 0 || a_fEflOutput != 0); */
5922
5923#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5924# ifdef VBOX_STRICT
5925 if ( pReNative->idxCurCall != 0
5926 && (a_fEflInput != 0 || a_fEflOutput != 0) /* for NOT these are both zero for now. */)
5927 {
5928 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5929 RT_CONSTEXPR uint32_t const fBoth = a_fEflInput | a_fEflOutput;
5930# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5931 AssertMsg( !(fBoth & (a_fElfConst)) \
5932 || (!(a_fEflInput & (a_fElfConst)) \
5933 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5934 : !(a_fEflOutput & (a_fElfConst)) \
5935 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5936 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5937 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5938 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5939 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5940 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5941 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5942 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5943 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5944 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5945# undef ASSERT_ONE_EFL
5946 }
5947# endif
5948#endif
5949
5950 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, a_fEflInput);
5951 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, a_fEflInput);
5952
5953 /** @todo This could be prettier...*/
5954 /** @todo Also, the shadowing+liveness handling of EFlags is currently
5955 * problematic, but I'll try tackle that soon (@bugref{10720}). */
5956 PCIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarEFlags)];
5957 Assert(pVar->enmKind == kIemNativeVarKind_Invalid || pVar->enmKind == kIemNativeVarKind_Stack);
5958 Assert(pVar->idxReg == UINT8_MAX);
5959 if (pVar->uArgNo >= IEMNATIVE_CALL_ARG_GREG_COUNT)
5960 {
5961 /** @todo We could use kIemNativeGstRegUse_ReadOnly here when fOutput is
5962 * zero, but since iemNativeVarRegisterSet clears the shadowing,
5963 * that's counter productive... */
5964 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
5965 a_fLivenessEflInput, a_fLivenessEflOutput);
5966 iemNativeVarRegisterSet(pReNative, idxVarEFlags, idxGstReg, off, true /*fAllocated*/);
5967 }
5968 else
5969 {
5970 /* Register argument variable: Avoid assertions in generic call code and load it the traditional way. */
5971 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5972 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(pReNative, &off,
5973 a_fLivenessEflInput, a_fLivenessEflOutput);
5974 if (idxGstReg != UINT8_MAX)
5975 {
5976 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstReg);
5977 iemNativeRegFreeTmp(pReNative, idxGstReg);
5978 }
5979 else
5980 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxVarReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5981 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5982 }
5983 return off;
5984}
5985
5986
5987
5988/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5989 * start using it with custom native code emission (inlining assembly
5990 * instruction helpers). */
5991#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5992#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5993 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5994 off = iemNativeEmitCommitEFlags<true /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
5995 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5996 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5997
5998#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5999#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
6000 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6001 off = iemNativeEmitCommitEFlags<false /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
6002 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
6003 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
6004
6005/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
6006template<bool const a_fUpdateSkippingAndPostponing, uint32_t const a_fEflOutput,
6007 uint64_t const a_fLivenessEflInputBits, uint64_t const a_fLivenessEflOutputBits>
6008DECL_INLINE_THROW(uint32_t)
6009iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fElfInput)
6010{
6011 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
6012 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
6013
6014#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6015# ifdef VBOX_STRICT
6016 if ( pReNative->idxCurCall != 0
6017 && (a_fLivenessEflInputBits != 0 || a_fLivenessEflOutputBits != 0) /* for NOT these are both zero for now. */)
6018 {
6019 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
6020# define ASSERT_ONE_EFL(a_idxField) \
6021 if RT_CONSTEXPR_IF(((a_fLivenessEflInputBits | a_fLivenessEflOutputBits) & RT_BIT_64(a_idxField)) != 0) \
6022 AssertMsg(!(a_fLivenessEflInputBits & RT_BIT_64(a_idxField)) \
6023 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
6024 : !(a_fLivenessEflOutputBits & RT_BIT_64(a_idxField)) \
6025 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
6026 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)), \
6027 ("%s - %u\n", #a_idxField, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
6028 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
6029 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
6030 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
6031 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
6032 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
6033 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
6034 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
6035# undef ASSERT_ONE_EFL
6036 }
6037# endif
6038#endif
6039
6040#ifdef VBOX_STRICT
6041 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
6042 uint32_t offFixup = off;
6043 off = iemNativeEmitJnzToFixed(pReNative, off, off);
6044 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
6045 iemNativeFixupFixedJump(pReNative, offFixup, off);
6046
6047 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
6048 offFixup = off;
6049 off = iemNativeEmitJzToFixed(pReNative, off, off);
6050 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
6051 iemNativeFixupFixedJump(pReNative, offFixup, off);
6052
6053 /** @todo validate that only bits in the a_fEflOutput mask changed. */
6054#endif
6055
6056#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6057 if RT_CONSTEXPR_IF(a_fUpdateSkippingAndPostponing)
6058 {
6059 Assert(!(pReNative->fSkippingEFlags & fElfInput)); RT_NOREF(fElfInput);
6060 if (pReNative->fSkippingEFlags)
6061 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitCommitEFlags)\n",
6062 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~(a_fEflOutput & X86_EFL_STATUS_BITS) ));
6063 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6064 pReNative->fSkippingEFlags = 0;
6065 else
6066 pReNative->fSkippingEFlags &= ~(a_fEflOutput & X86_EFL_STATUS_BITS);
6067# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6068 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6069 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6070 else
6071 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6072 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6073# endif
6074 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6075 }
6076#endif
6077
6078 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
6079 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
6080 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
6081 return off;
6082}
6083
6084
6085typedef enum IEMNATIVEMITEFLOP
6086{
6087 kIemNativeEmitEflOp_Set,
6088 kIemNativeEmitEflOp_Clear,
6089 kIemNativeEmitEflOp_Flip
6090} IEMNATIVEMITEFLOP;
6091
6092#define IEM_MC_SET_EFL_BIT(a_fBit) \
6093 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Set, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6094
6095#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
6096 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Clear, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6097
6098#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
6099 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Flip, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6100
6101/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
6102template<IEMNATIVEMITEFLOP const a_enmOp, uint32_t const a_fEflBit, uint64_t const a_fLivenessEflBit>
6103DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6104{
6105 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
6106 a_enmOp == kIemNativeEmitEflOp_Flip
6107 ? a_fLivenessEflBit : 0,
6108 a_fLivenessEflBit);
6109
6110 /* Using 'if constexpr' forces code elimination in debug builds with VC. */
6111 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Set)
6112 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6113 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Clear)
6114 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~a_fEflBit);
6115 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Flip)
6116 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6117 else
6118 AssertCompile( a_enmOp == kIemNativeEmitEflOp_Set /* AssertCompile(false) works with VC 2019 but not clang 15. */
6119 || a_enmOp == kIemNativeEmitEflOp_Clear
6120 || a_enmOp == kIemNativeEmitEflOp_Flip);
6121
6122 /** @todo No delayed writeback for EFLAGS right now. */
6123 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6124
6125 /* Free but don't flush the EFLAGS register. */
6126 iemNativeRegFreeTmp(pReNative, idxEflReg);
6127
6128#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6129 /* Clear the bit in the skipped mask if we're clobbering and it's a status bit. */
6130 if RT_CONSTEXPR_IF( (a_enmOp == kIemNativeEmitEflOp_Set || a_enmOp == kIemNativeEmitEflOp_Clear)
6131 && (a_fEflBit & X86_EFL_STATUS_BITS))
6132 {
6133 if (pReNative->fSkippingEFlags)
6134 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitModifyEFlagsBit)\n",
6135 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflBit ));
6136 pReNative->fSkippingEFlags &= ~a_fEflBit;
6137# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6138 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~a_fEflBit, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6139# endif
6140 }
6141#endif
6142
6143 return off;
6144}
6145
6146
6147/*********************************************************************************************************************************
6148* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
6149*********************************************************************************************************************************/
6150
6151#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
6152 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
6153
6154#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
6155 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
6156
6157#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
6158 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
6159
6160
6161/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
6162 * IEM_MC_FETCH_SREG_ZX_U64. */
6163DECL_INLINE_THROW(uint32_t)
6164iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
6165{
6166 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6167 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
6168 Assert(iSReg < X86_SREG_COUNT);
6169
6170 /*
6171 * For now, we will not create a shadow copy of a selector. The rational
6172 * is that since we do not recompile the popping and loading of segment
6173 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
6174 * pushing and moving to registers, there is only a small chance that the
6175 * shadow copy will be accessed again before the register is reloaded. One
6176 * scenario would be nested called in 16-bit code, but I doubt it's worth
6177 * the extra register pressure atm.
6178 *
6179 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
6180 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
6181 * store scencario covered at present (r160730).
6182 */
6183 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6184 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6185 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
6186 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6187 return off;
6188}
6189
6190
6191
6192/*********************************************************************************************************************************
6193* Register references. *
6194*********************************************************************************************************************************/
6195
6196#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6197 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6198
6199#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
6200 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6201
6202/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6203DECL_INLINE_THROW(uint32_t)
6204iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6205{
6206 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6207 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6208 Assert(iGRegEx < 20);
6209
6210 if (iGRegEx < 16)
6211 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6212 else
6213 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6214
6215 /* If we've delayed writing back the register value, flush it now. */
6216 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6217
6218 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6219 if (!fConst)
6220 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6221
6222 return off;
6223}
6224
6225#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6226 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6227
6228#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6229 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6230
6231#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6232 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6233
6234#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6235 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6236
6237#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
6238 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
6239
6240#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
6241 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
6242
6243#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
6244 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
6245
6246#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
6247 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
6248
6249#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
6250 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
6251
6252#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
6253 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
6254
6255/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
6256DECL_INLINE_THROW(uint32_t)
6257iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
6258{
6259 Assert(iGReg < 16);
6260 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
6261 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6262
6263 /* If we've delayed writing back the register value, flush it now. */
6264 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
6265
6266 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6267 if (!fConst)
6268 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
6269
6270 return off;
6271}
6272
6273
6274#undef IEM_MC_REF_EFLAGS /* should not be used. */
6275#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
6276 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6277 off = iemNativeEmitRefEFlags<a_fEflOutput>(pReNative, off, a_pEFlags, a_fEflInput)
6278
6279/** Handles IEM_MC_REF_EFLAGS. */
6280template<uint32_t const a_fEflOutput>
6281DECL_INLINE_THROW(uint32_t)
6282iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput)
6283{
6284 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
6285 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6286
6287#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6288 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fEflInput);
6289 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
6290 if (pReNative->fSkippingEFlags)
6291 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitRefEFlags)\n",
6292 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflOutput ));
6293 pReNative->fSkippingEFlags &= ~a_fEflOutput;
6294# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6295
6296 /* Updating the skipping according to the outputs is a little early, but
6297 we don't have any other hooks for references atm. */
6298 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6299 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6300 else if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) != 0)
6301 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6302 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6303# endif
6304
6305 /* This ASSUMES that EFLAGS references are not taken before use. */
6306 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6307
6308#endif
6309 RT_NOREF(fEflInput);
6310
6311 /* If we've delayed writing back the register value, flush it now. */
6312 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
6313
6314 /* If there is a shadow copy of guest EFLAGS, flush it now. */
6315 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
6316
6317 return off;
6318}
6319
6320
6321/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
6322 * different code from threaded recompiler, maybe it would be helpful. For now
6323 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
6324#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
6325
6326
6327#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
6328 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
6329
6330#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
6331 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
6332
6333#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
6334 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
6335
6336#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
6337 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
6338
6339#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6340/* Just being paranoid here. */
6341# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
6342AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
6343AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
6344AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
6345AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
6346# endif
6347AssertCompileMemberOffset(X86XMMREG, au64, 0);
6348AssertCompileMemberOffset(X86XMMREG, au32, 0);
6349AssertCompileMemberOffset(X86XMMREG, ar64, 0);
6350AssertCompileMemberOffset(X86XMMREG, ar32, 0);
6351
6352# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
6353 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
6354# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
6355 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
6356# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
6357 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
6358# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
6359 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
6360#endif
6361
6362/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
6363DECL_INLINE_THROW(uint32_t)
6364iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
6365{
6366 Assert(iXReg < 16);
6367 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
6368 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6369
6370 /* If we've delayed writing back the register value, flush it now. */
6371 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
6372
6373#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6374 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6375 if (!fConst)
6376 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
6377#else
6378 RT_NOREF(fConst);
6379#endif
6380
6381 return off;
6382}
6383
6384
6385
6386/*********************************************************************************************************************************
6387* Effective Address Calculation *
6388*********************************************************************************************************************************/
6389#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6390 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6391
6392/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6393 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6394DECL_INLINE_THROW(uint32_t)
6395iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6396 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6397{
6398 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6399
6400 /*
6401 * Handle the disp16 form with no registers first.
6402 *
6403 * Convert to an immediate value, as that'll delay the register allocation
6404 * and assignment till the memory access / call / whatever and we can use
6405 * a more appropriate register (or none at all).
6406 */
6407 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6408 {
6409 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
6410 return off;
6411 }
6412
6413 /* Determin the displacment. */
6414 uint16_t u16EffAddr;
6415 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6416 {
6417 case 0: u16EffAddr = 0; break;
6418 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6419 case 2: u16EffAddr = u16Disp; break;
6420 default: AssertFailedStmt(u16EffAddr = 0);
6421 }
6422
6423 /* Determine the registers involved. */
6424 uint8_t idxGstRegBase;
6425 uint8_t idxGstRegIndex;
6426 switch (bRm & X86_MODRM_RM_MASK)
6427 {
6428 case 0:
6429 idxGstRegBase = X86_GREG_xBX;
6430 idxGstRegIndex = X86_GREG_xSI;
6431 break;
6432 case 1:
6433 idxGstRegBase = X86_GREG_xBX;
6434 idxGstRegIndex = X86_GREG_xDI;
6435 break;
6436 case 2:
6437 idxGstRegBase = X86_GREG_xBP;
6438 idxGstRegIndex = X86_GREG_xSI;
6439 break;
6440 case 3:
6441 idxGstRegBase = X86_GREG_xBP;
6442 idxGstRegIndex = X86_GREG_xDI;
6443 break;
6444 case 4:
6445 idxGstRegBase = X86_GREG_xSI;
6446 idxGstRegIndex = UINT8_MAX;
6447 break;
6448 case 5:
6449 idxGstRegBase = X86_GREG_xDI;
6450 idxGstRegIndex = UINT8_MAX;
6451 break;
6452 case 6:
6453 idxGstRegBase = X86_GREG_xBP;
6454 idxGstRegIndex = UINT8_MAX;
6455 break;
6456#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6457 default:
6458#endif
6459 case 7:
6460 idxGstRegBase = X86_GREG_xBX;
6461 idxGstRegIndex = UINT8_MAX;
6462 break;
6463 }
6464
6465 /*
6466 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6467 */
6468 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6469 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6470 kIemNativeGstRegUse_ReadOnly);
6471 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6472 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6473 kIemNativeGstRegUse_ReadOnly)
6474 : UINT8_MAX;
6475#ifdef RT_ARCH_AMD64
6476 if (idxRegIndex == UINT8_MAX)
6477 {
6478 if (u16EffAddr == 0)
6479 {
6480 /* movxz ret, base */
6481 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6482 }
6483 else
6484 {
6485 /* lea ret32, [base64 + disp32] */
6486 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6487 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6488 if (idxRegRet >= 8 || idxRegBase >= 8)
6489 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6490 pbCodeBuf[off++] = 0x8d;
6491 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6492 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6493 else
6494 {
6495 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6496 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6497 }
6498 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6499 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6500 pbCodeBuf[off++] = 0;
6501 pbCodeBuf[off++] = 0;
6502 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6503
6504 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6505 }
6506 }
6507 else
6508 {
6509 /* lea ret32, [index64 + base64 (+ disp32)] */
6510 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6511 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6512 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6513 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6514 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6515 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6516 pbCodeBuf[off++] = 0x8d;
6517 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6518 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6519 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6520 if (bMod == X86_MOD_MEM4)
6521 {
6522 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6523 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6524 pbCodeBuf[off++] = 0;
6525 pbCodeBuf[off++] = 0;
6526 }
6527 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6528 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6529 }
6530
6531#elif defined(RT_ARCH_ARM64)
6532 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6533 if (u16EffAddr == 0)
6534 {
6535 if (idxRegIndex == UINT8_MAX)
6536 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
6537 else
6538 {
6539 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
6540 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6541 }
6542 }
6543 else
6544 {
6545 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
6546 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
6547 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
6548 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6549 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
6550 else
6551 {
6552 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
6553 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6554 }
6555 if (idxRegIndex != UINT8_MAX)
6556 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
6557 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6558 }
6559
6560#else
6561# error "port me"
6562#endif
6563
6564 if (idxRegIndex != UINT8_MAX)
6565 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6566 iemNativeRegFreeTmp(pReNative, idxRegBase);
6567 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6568 return off;
6569}
6570
6571
6572#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6573 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6574
6575/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
6576 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
6577DECL_INLINE_THROW(uint32_t)
6578iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6579 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
6580{
6581 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6582
6583 /*
6584 * Handle the disp32 form with no registers first.
6585 *
6586 * Convert to an immediate value, as that'll delay the register allocation
6587 * and assignment till the memory access / call / whatever and we can use
6588 * a more appropriate register (or none at all).
6589 */
6590 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6591 {
6592 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
6593 return off;
6594 }
6595
6596 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6597 uint32_t u32EffAddr = 0;
6598 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6599 {
6600 case 0: break;
6601 case 1: u32EffAddr = (int8_t)u32Disp; break;
6602 case 2: u32EffAddr = u32Disp; break;
6603 default: AssertFailed();
6604 }
6605
6606 /* Get the register (or SIB) value. */
6607 uint8_t idxGstRegBase = UINT8_MAX;
6608 uint8_t idxGstRegIndex = UINT8_MAX;
6609 uint8_t cShiftIndex = 0;
6610 switch (bRm & X86_MODRM_RM_MASK)
6611 {
6612 case 0: idxGstRegBase = X86_GREG_xAX; break;
6613 case 1: idxGstRegBase = X86_GREG_xCX; break;
6614 case 2: idxGstRegBase = X86_GREG_xDX; break;
6615 case 3: idxGstRegBase = X86_GREG_xBX; break;
6616 case 4: /* SIB */
6617 {
6618 /* index /w scaling . */
6619 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6620 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6621 {
6622 case 0: idxGstRegIndex = X86_GREG_xAX; break;
6623 case 1: idxGstRegIndex = X86_GREG_xCX; break;
6624 case 2: idxGstRegIndex = X86_GREG_xDX; break;
6625 case 3: idxGstRegIndex = X86_GREG_xBX; break;
6626 case 4: cShiftIndex = 0; /*no index*/ break;
6627 case 5: idxGstRegIndex = X86_GREG_xBP; break;
6628 case 6: idxGstRegIndex = X86_GREG_xSI; break;
6629 case 7: idxGstRegIndex = X86_GREG_xDI; break;
6630 }
6631
6632 /* base */
6633 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
6634 {
6635 case 0: idxGstRegBase = X86_GREG_xAX; break;
6636 case 1: idxGstRegBase = X86_GREG_xCX; break;
6637 case 2: idxGstRegBase = X86_GREG_xDX; break;
6638 case 3: idxGstRegBase = X86_GREG_xBX; break;
6639 case 4:
6640 idxGstRegBase = X86_GREG_xSP;
6641 u32EffAddr += uSibAndRspOffset >> 8;
6642 break;
6643 case 5:
6644 if ((bRm & X86_MODRM_MOD_MASK) != 0)
6645 idxGstRegBase = X86_GREG_xBP;
6646 else
6647 {
6648 Assert(u32EffAddr == 0);
6649 u32EffAddr = u32Disp;
6650 }
6651 break;
6652 case 6: idxGstRegBase = X86_GREG_xSI; break;
6653 case 7: idxGstRegBase = X86_GREG_xDI; break;
6654 }
6655 break;
6656 }
6657 case 5: idxGstRegBase = X86_GREG_xBP; break;
6658 case 6: idxGstRegBase = X86_GREG_xSI; break;
6659 case 7: idxGstRegBase = X86_GREG_xDI; break;
6660 }
6661
6662 /*
6663 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6664 * the start of the function.
6665 */
6666 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6667 {
6668 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
6669 return off;
6670 }
6671
6672 /*
6673 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6674 */
6675 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6676 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6677 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6678 kIemNativeGstRegUse_ReadOnly);
6679 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6680 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6681 kIemNativeGstRegUse_ReadOnly);
6682
6683 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6684 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6685 {
6686 idxRegBase = idxRegIndex;
6687 idxRegIndex = UINT8_MAX;
6688 }
6689
6690#ifdef RT_ARCH_AMD64
6691 if (idxRegIndex == UINT8_MAX)
6692 {
6693 if (u32EffAddr == 0)
6694 {
6695 /* mov ret, base */
6696 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6697 }
6698 else
6699 {
6700 /* lea ret32, [base64 + disp32] */
6701 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6702 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6703 if (idxRegRet >= 8 || idxRegBase >= 8)
6704 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6705 pbCodeBuf[off++] = 0x8d;
6706 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6707 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6708 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6709 else
6710 {
6711 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6712 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6713 }
6714 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6715 if (bMod == X86_MOD_MEM4)
6716 {
6717 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6718 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6719 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6720 }
6721 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6722 }
6723 }
6724 else
6725 {
6726 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6727 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6728 if (idxRegBase == UINT8_MAX)
6729 {
6730 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
6731 if (idxRegRet >= 8 || idxRegIndex >= 8)
6732 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6733 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6734 pbCodeBuf[off++] = 0x8d;
6735 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6736 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6737 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6738 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6739 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6740 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6741 }
6742 else
6743 {
6744 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6745 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6746 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6747 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6748 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6749 pbCodeBuf[off++] = 0x8d;
6750 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6751 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6752 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6753 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6754 if (bMod != X86_MOD_MEM0)
6755 {
6756 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6757 if (bMod == X86_MOD_MEM4)
6758 {
6759 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6760 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6761 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6762 }
6763 }
6764 }
6765 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6766 }
6767
6768#elif defined(RT_ARCH_ARM64)
6769 if (u32EffAddr == 0)
6770 {
6771 if (idxRegIndex == UINT8_MAX)
6772 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6773 else if (idxRegBase == UINT8_MAX)
6774 {
6775 if (cShiftIndex == 0)
6776 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
6777 else
6778 {
6779 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6780 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6781 }
6782 }
6783 else
6784 {
6785 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6786 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6787 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6788 }
6789 }
6790 else
6791 {
6792 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6793 {
6794 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6795 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6796 }
6797 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6798 {
6799 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6800 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6801 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6802 }
6803 else
6804 {
6805 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6806 if (idxRegBase != UINT8_MAX)
6807 {
6808 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6809 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6810 }
6811 }
6812 if (idxRegIndex != UINT8_MAX)
6813 {
6814 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6815 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6816 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6817 }
6818 }
6819
6820#else
6821# error "port me"
6822#endif
6823
6824 if (idxRegIndex != UINT8_MAX)
6825 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6826 if (idxRegBase != UINT8_MAX)
6827 iemNativeRegFreeTmp(pReNative, idxRegBase);
6828 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6829 return off;
6830}
6831
6832
6833#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6834 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6835 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6836
6837#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6838 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6839 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6840
6841#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6842 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6843 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6844
6845/**
6846 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6847 *
6848 * @returns New off.
6849 * @param pReNative .
6850 * @param off .
6851 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6852 * bit 4 to REX.X. The two bits are part of the
6853 * REG sub-field, which isn't needed in this
6854 * function.
6855 * @param uSibAndRspOffset Two parts:
6856 * - The first 8 bits make up the SIB byte.
6857 * - The next 8 bits are the fixed RSP/ESP offset
6858 * in case of a pop [xSP].
6859 * @param u32Disp The displacement byte/word/dword, if any.
6860 * @param cbInstr The size of the fully decoded instruction. Used
6861 * for RIP relative addressing.
6862 * @param idxVarRet The result variable number.
6863 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6864 * when calculating the address.
6865 *
6866 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6867 */
6868DECL_INLINE_THROW(uint32_t)
6869iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6870 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6871{
6872 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6873
6874 /*
6875 * Special case the rip + disp32 form first.
6876 */
6877 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6878 {
6879 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6880 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6881 kIemNativeGstRegUse_ReadOnly);
6882 if (f64Bit)
6883 {
6884#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6885 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr + (int64_t)pReNative->Core.offPc;
6886#else
6887 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6888#endif
6889#ifdef RT_ARCH_AMD64
6890 if ((int32_t)offFinalDisp == offFinalDisp)
6891 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6892 else
6893 {
6894 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6895 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6896 }
6897#else
6898 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6899#endif
6900 }
6901 else
6902 {
6903# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6904 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr + (int32_t)pReNative->Core.offPc;
6905# else
6906 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr;
6907# endif
6908 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6909 }
6910 iemNativeRegFreeTmp(pReNative, idxRegPc);
6911 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6912 return off;
6913 }
6914
6915 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6916 int64_t i64EffAddr = 0;
6917 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6918 {
6919 case 0: break;
6920 case 1: i64EffAddr = (int8_t)u32Disp; break;
6921 case 2: i64EffAddr = (int32_t)u32Disp; break;
6922 default: AssertFailed();
6923 }
6924
6925 /* Get the register (or SIB) value. */
6926 uint8_t idxGstRegBase = UINT8_MAX;
6927 uint8_t idxGstRegIndex = UINT8_MAX;
6928 uint8_t cShiftIndex = 0;
6929 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6930 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6931 else /* SIB: */
6932 {
6933 /* index /w scaling . */
6934 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6935 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6936 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6937 if (idxGstRegIndex == 4)
6938 {
6939 /* no index */
6940 cShiftIndex = 0;
6941 idxGstRegIndex = UINT8_MAX;
6942 }
6943
6944 /* base */
6945 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6946 if (idxGstRegBase == 4)
6947 {
6948 /* pop [rsp] hack */
6949 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6950 }
6951 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6952 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6953 {
6954 /* mod=0 and base=5 -> disp32, no base reg. */
6955 Assert(i64EffAddr == 0);
6956 i64EffAddr = (int32_t)u32Disp;
6957 idxGstRegBase = UINT8_MAX;
6958 }
6959 }
6960
6961 /*
6962 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6963 * the start of the function.
6964 */
6965 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6966 {
6967 if (f64Bit)
6968 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6969 else
6970 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6971 return off;
6972 }
6973
6974 /*
6975 * Now emit code that calculates:
6976 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6977 * or if !f64Bit:
6978 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6979 */
6980 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6981 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6982 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6983 kIemNativeGstRegUse_ReadOnly);
6984 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6985 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6986 kIemNativeGstRegUse_ReadOnly);
6987
6988 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6989 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6990 {
6991 idxRegBase = idxRegIndex;
6992 idxRegIndex = UINT8_MAX;
6993 }
6994
6995#ifdef RT_ARCH_AMD64
6996 uint8_t bFinalAdj;
6997 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6998 bFinalAdj = 0; /* likely */
6999 else
7000 {
7001 /* pop [rsp] with a problematic disp32 value. Split out the
7002 RSP offset and add it separately afterwards (bFinalAdj). */
7003 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
7004 Assert(idxGstRegBase == X86_GREG_xSP);
7005 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
7006 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
7007 Assert(bFinalAdj != 0);
7008 i64EffAddr -= bFinalAdj;
7009 Assert((int32_t)i64EffAddr == i64EffAddr);
7010 }
7011 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
7012//pReNative->pInstrBuf[off++] = 0xcc;
7013
7014 if (idxRegIndex == UINT8_MAX)
7015 {
7016 if (u32EffAddr == 0)
7017 {
7018 /* mov ret, base */
7019 if (f64Bit)
7020 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
7021 else
7022 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
7023 }
7024 else
7025 {
7026 /* lea ret, [base + disp32] */
7027 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
7028 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7029 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
7030 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7031 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7032 | (f64Bit ? X86_OP_REX_W : 0);
7033 pbCodeBuf[off++] = 0x8d;
7034 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7035 if (idxRegBase != X86_GREG_x12 /*SIB*/)
7036 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
7037 else
7038 {
7039 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7040 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
7041 }
7042 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7043 if (bMod == X86_MOD_MEM4)
7044 {
7045 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7046 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7047 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7048 }
7049 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7050 }
7051 }
7052 else
7053 {
7054 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
7055 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7056 if (idxRegBase == UINT8_MAX)
7057 {
7058 /* lea ret, [(index64 << cShiftIndex) + disp32] */
7059 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
7060 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7061 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7062 | (f64Bit ? X86_OP_REX_W : 0);
7063 pbCodeBuf[off++] = 0x8d;
7064 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
7065 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
7066 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7067 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7068 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7069 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7070 }
7071 else
7072 {
7073 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
7074 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
7075 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7076 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7077 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7078 | (f64Bit ? X86_OP_REX_W : 0);
7079 pbCodeBuf[off++] = 0x8d;
7080 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
7081 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7082 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7083 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
7084 if (bMod != X86_MOD_MEM0)
7085 {
7086 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7087 if (bMod == X86_MOD_MEM4)
7088 {
7089 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7090 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7091 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7092 }
7093 }
7094 }
7095 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7096 }
7097
7098 if (!bFinalAdj)
7099 { /* likely */ }
7100 else
7101 {
7102 Assert(f64Bit);
7103 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
7104 }
7105
7106#elif defined(RT_ARCH_ARM64)
7107 if (i64EffAddr == 0)
7108 {
7109 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7110 if (idxRegIndex == UINT8_MAX)
7111 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
7112 else if (idxRegBase != UINT8_MAX)
7113 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
7114 f64Bit, false /*fSetFlags*/, cShiftIndex);
7115 else
7116 {
7117 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
7118 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
7119 }
7120 }
7121 else
7122 {
7123 if (f64Bit)
7124 { /* likely */ }
7125 else
7126 i64EffAddr = (int32_t)i64EffAddr;
7127
7128 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
7129 {
7130 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7131 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
7132 }
7133 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
7134 {
7135 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7136 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
7137 }
7138 else
7139 {
7140 if (f64Bit)
7141 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
7142 else
7143 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
7144 if (idxRegBase != UINT8_MAX)
7145 {
7146 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7147 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
7148 }
7149 }
7150 if (idxRegIndex != UINT8_MAX)
7151 {
7152 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7153 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
7154 f64Bit, false /*fSetFlags*/, cShiftIndex);
7155 }
7156 }
7157
7158#else
7159# error "port me"
7160#endif
7161
7162 if (idxRegIndex != UINT8_MAX)
7163 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7164 if (idxRegBase != UINT8_MAX)
7165 iemNativeRegFreeTmp(pReNative, idxRegBase);
7166 iemNativeVarRegisterRelease(pReNative, idxVarRet);
7167 return off;
7168}
7169
7170
7171/*********************************************************************************************************************************
7172* Memory fetches and stores common *
7173*********************************************************************************************************************************/
7174
7175typedef enum IEMNATIVEMITMEMOP
7176{
7177 kIemNativeEmitMemOp_Store = 0,
7178 kIemNativeEmitMemOp_Fetch,
7179 kIemNativeEmitMemOp_Fetch_Zx_U16,
7180 kIemNativeEmitMemOp_Fetch_Zx_U32,
7181 kIemNativeEmitMemOp_Fetch_Zx_U64,
7182 kIemNativeEmitMemOp_Fetch_Sx_U16,
7183 kIemNativeEmitMemOp_Fetch_Sx_U32,
7184 kIemNativeEmitMemOp_Fetch_Sx_U64
7185} IEMNATIVEMITMEMOP;
7186
7187/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
7188 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
7189 * (with iSegReg = UINT8_MAX). */
7190/** @todo Pass enmOp, cbMem, fAlignMaskAndClt and a iSegReg == UINT8_MAX
7191 * indicator as template parameters. */
7192DECL_INLINE_THROW(uint32_t)
7193iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
7194 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
7195 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
7196{
7197 /*
7198 * Assert sanity.
7199 */
7200 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7201 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7202 Assert( enmOp != kIemNativeEmitMemOp_Store
7203 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7204 || pVarValue->enmKind == kIemNativeVarKind_Stack);
7205 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7206 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7207 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7208 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7209 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7210 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7211#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7212 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
7213 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
7214#else
7215 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
7216#endif
7217 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
7218 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7219#ifdef VBOX_STRICT
7220 if (iSegReg == UINT8_MAX)
7221 {
7222 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7223 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7224 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7225 switch (cbMem)
7226 {
7227 case 1:
7228 Assert( pfnFunction
7229 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
7230 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7231 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7232 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7233 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7234 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
7235 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
7236 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
7237 : UINT64_C(0xc000b000a0009000) ));
7238 Assert(!fAlignMaskAndCtl);
7239 break;
7240 case 2:
7241 Assert( pfnFunction
7242 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
7243 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7244 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7245 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7246 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
7247 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
7248 : UINT64_C(0xc000b000a0009000) ));
7249 Assert(fAlignMaskAndCtl <= 1);
7250 break;
7251 case 4:
7252 Assert( pfnFunction
7253 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
7254 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7255 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7256 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
7257 : UINT64_C(0xc000b000a0009000) ));
7258 Assert(fAlignMaskAndCtl <= 3);
7259 break;
7260 case 8:
7261 Assert( pfnFunction
7262 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
7263 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
7264 : UINT64_C(0xc000b000a0009000) ));
7265 Assert(fAlignMaskAndCtl <= 7);
7266 break;
7267#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7268 case sizeof(RTUINT128U):
7269 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7270 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
7271 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7272 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
7273 || ( enmOp == kIemNativeEmitMemOp_Store
7274 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7275 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
7276 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7277 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7278 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
7279 : fAlignMaskAndCtl <= 15);
7280 break;
7281 case sizeof(RTUINT256U):
7282 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7283 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
7284 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
7285 || ( enmOp == kIemNativeEmitMemOp_Store
7286 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
7287 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
7288 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
7289 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
7290 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
7291 : fAlignMaskAndCtl <= 31);
7292 break;
7293#endif
7294 }
7295 }
7296 else
7297 {
7298 Assert(iSegReg < 6);
7299 switch (cbMem)
7300 {
7301 case 1:
7302 Assert( pfnFunction
7303 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
7304 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
7305 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7306 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7307 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7308 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
7309 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
7310 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
7311 : UINT64_C(0xc000b000a0009000) ));
7312 Assert(!fAlignMaskAndCtl);
7313 break;
7314 case 2:
7315 Assert( pfnFunction
7316 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
7317 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
7318 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7319 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7320 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
7321 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
7322 : UINT64_C(0xc000b000a0009000) ));
7323 Assert(fAlignMaskAndCtl <= 1);
7324 break;
7325 case 4:
7326 Assert( pfnFunction
7327 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
7328 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
7329 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
7330 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
7331 : UINT64_C(0xc000b000a0009000) ));
7332 Assert(fAlignMaskAndCtl <= 3);
7333 break;
7334 case 8:
7335 Assert( pfnFunction
7336 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
7337 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
7338 : UINT64_C(0xc000b000a0009000) ));
7339 Assert(fAlignMaskAndCtl <= 7);
7340 break;
7341#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7342 case sizeof(RTUINT128U):
7343 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7344 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
7345 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7346 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
7347 || ( enmOp == kIemNativeEmitMemOp_Store
7348 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7349 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
7350 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7351 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7352 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
7353 : fAlignMaskAndCtl <= 15);
7354 break;
7355 case sizeof(RTUINT256U):
7356 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7357 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
7358 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
7359 || ( enmOp == kIemNativeEmitMemOp_Store
7360 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
7361 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
7362 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
7363 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
7364 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
7365 : fAlignMaskAndCtl <= 31);
7366 break;
7367#endif
7368 }
7369 }
7370#endif
7371
7372#ifdef VBOX_STRICT
7373 /*
7374 * Check that the fExec flags we've got make sense.
7375 */
7376 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7377#endif
7378
7379 /*
7380 * To keep things simple we have to commit any pending writes first as we
7381 * may end up making calls.
7382 */
7383 /** @todo we could postpone this till we make the call and reload the
7384 * registers after returning from the call. Not sure if that's sensible or
7385 * not, though. */
7386#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7387 off = iemNativeRegFlushPendingWrites(pReNative, off);
7388#else
7389 /* The program counter is treated differently for now. */
7390 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
7391#endif
7392
7393#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7394 /*
7395 * Move/spill/flush stuff out of call-volatile registers.
7396 * This is the easy way out. We could contain this to the tlb-miss branch
7397 * by saving and restoring active stuff here.
7398 */
7399 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7400#endif
7401
7402 /*
7403 * Define labels and allocate the result register (trying for the return
7404 * register if we can).
7405 */
7406 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7407#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7408 uint8_t idxRegValueFetch = UINT8_MAX;
7409
7410 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7411 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7412 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
7413 else
7414 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7415 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7416 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7417 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7418#else
7419 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7420 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7421 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7422 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7423#endif
7424 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
7425
7426#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7427 uint8_t idxRegValueStore = UINT8_MAX;
7428
7429 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7430 idxRegValueStore = !TlbState.fSkip
7431 && enmOp == kIemNativeEmitMemOp_Store
7432 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7433 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7434 : UINT8_MAX;
7435 else
7436 idxRegValueStore = !TlbState.fSkip
7437 && enmOp == kIemNativeEmitMemOp_Store
7438 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7439 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7440 : UINT8_MAX;
7441
7442#else
7443 uint8_t const idxRegValueStore = !TlbState.fSkip
7444 && enmOp == kIemNativeEmitMemOp_Store
7445 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7446 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7447 : UINT8_MAX;
7448#endif
7449 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7450 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7451 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7452 : UINT32_MAX;
7453
7454 /*
7455 * Jump to the TLB lookup code.
7456 */
7457 if (!TlbState.fSkip)
7458 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7459
7460 /*
7461 * TlbMiss:
7462 *
7463 * Call helper to do the fetching.
7464 * We flush all guest register shadow copies here.
7465 */
7466 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7467
7468#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7469 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7470#else
7471 RT_NOREF(idxInstr);
7472#endif
7473
7474#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7475 if (pReNative->Core.offPc)
7476 {
7477 /*
7478 * Update the program counter but restore it at the end of the TlbMiss branch.
7479 * This should allow delaying more program counter updates for the TlbLookup and hit paths
7480 * which are hopefully much more frequent, reducing the amount of memory accesses.
7481 */
7482 /* Allocate a temporary PC register. */
7483/** @todo r=bird: This would technically need to be done up front as it's a register allocation. */
7484 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7485 kIemNativeGstRegUse_ForUpdate);
7486
7487 /* Perform the addition and store the result. */
7488 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7489 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7490# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
7491 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
7492# endif
7493
7494 /* Free and flush the PC register. */
7495 iemNativeRegFreeTmp(pReNative, idxPcReg);
7496 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7497 }
7498#endif
7499
7500#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7501 /* Save variables in volatile registers. */
7502 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7503 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
7504 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
7505 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7506#endif
7507
7508 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
7509 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
7510#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7511 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7512 {
7513 /*
7514 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
7515 *
7516 * Note! There was a register variable assigned to the variable for the TlbLookup case above
7517 * which must not be freed or the value loaded into the register will not be synced into the register
7518 * further down the road because the variable doesn't know it had a variable assigned.
7519 *
7520 * Note! For loads it is not required to sync what is in the assigned register with the stack slot
7521 * as it will be overwritten anyway.
7522 */
7523 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7524 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
7525 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
7526 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7527 }
7528 else
7529#endif
7530 if (enmOp == kIemNativeEmitMemOp_Store)
7531 {
7532 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7533 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
7534#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7535 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7536#else
7537 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7538 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7539#endif
7540 }
7541
7542 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
7543 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
7544#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7545 fVolGregMask);
7546#else
7547 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
7548#endif
7549
7550 if (iSegReg != UINT8_MAX)
7551 {
7552 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
7553 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7554 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
7555 }
7556
7557#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
7558 /* Do delayed EFLAGS calculations. */
7559 if (enmOp == kIemNativeEmitMemOp_Store || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7560 {
7561 if (iSegReg == UINT8_MAX)
7562 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7563 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7564 fHstRegsNotToSave);
7565 else
7566 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7567 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
7568 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
7569 fHstRegsNotToSave);
7570 }
7571 else if (iSegReg == UINT8_MAX)
7572 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState,
7573 fHstRegsNotToSave);
7574 else
7575 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7576 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7577 fHstRegsNotToSave);
7578#endif
7579
7580 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7581 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7582
7583 /* Done setting up parameters, make the call. */
7584 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
7585
7586 /*
7587 * Put the result in the right register if this is a fetch.
7588 */
7589 if (enmOp != kIemNativeEmitMemOp_Store)
7590 {
7591#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7592 if ( cbMem == sizeof(RTUINT128U)
7593 || cbMem == sizeof(RTUINT256U))
7594 {
7595 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
7596
7597 /* Sync the value on the stack with the host register assigned to the variable. */
7598 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
7599 }
7600 else
7601#endif
7602 {
7603 Assert(idxRegValueFetch == pVarValue->idxReg);
7604 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7605 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
7606 }
7607 }
7608
7609#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7610 /* Restore variables and guest shadow registers to volatile registers. */
7611 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7612 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7613#endif
7614
7615#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7616 if (pReNative->Core.offPc)
7617 {
7618 /*
7619 * Time to restore the program counter to its original value.
7620 */
7621 /* Allocate a temporary PC register. */
7622 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7623 kIemNativeGstRegUse_ForUpdate);
7624
7625 /* Restore the original value. */
7626 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7627 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7628
7629 /* Free and flush the PC register. */
7630 iemNativeRegFreeTmp(pReNative, idxPcReg);
7631 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7632 }
7633#endif
7634
7635#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7636 if (!TlbState.fSkip)
7637 {
7638 /* end of TlbMiss - Jump to the done label. */
7639 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7640 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7641
7642 /*
7643 * TlbLookup:
7644 */
7645 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
7646 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
7647 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
7648
7649 /*
7650 * Emit code to do the actual storing / fetching.
7651 */
7652 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7653# ifdef IEM_WITH_TLB_STATISTICS
7654 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7655 enmOp == kIemNativeEmitMemOp_Store
7656 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
7657 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
7658# endif
7659 switch (enmOp)
7660 {
7661 case kIemNativeEmitMemOp_Store:
7662 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
7663 {
7664 switch (cbMem)
7665 {
7666 case 1:
7667 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7668 break;
7669 case 2:
7670 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7671 break;
7672 case 4:
7673 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7674 break;
7675 case 8:
7676 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7677 break;
7678#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7679 case sizeof(RTUINT128U):
7680 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7681 break;
7682 case sizeof(RTUINT256U):
7683 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7684 break;
7685#endif
7686 default:
7687 AssertFailed();
7688 }
7689 }
7690 else
7691 {
7692 switch (cbMem)
7693 {
7694 case 1:
7695 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
7696 idxRegMemResult, TlbState.idxReg1);
7697 break;
7698 case 2:
7699 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7700 idxRegMemResult, TlbState.idxReg1);
7701 break;
7702 case 4:
7703 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7704 idxRegMemResult, TlbState.idxReg1);
7705 break;
7706 case 8:
7707 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
7708 idxRegMemResult, TlbState.idxReg1);
7709 break;
7710 default:
7711 AssertFailed();
7712 }
7713 }
7714 break;
7715
7716 case kIemNativeEmitMemOp_Fetch:
7717 case kIemNativeEmitMemOp_Fetch_Zx_U16:
7718 case kIemNativeEmitMemOp_Fetch_Zx_U32:
7719 case kIemNativeEmitMemOp_Fetch_Zx_U64:
7720 switch (cbMem)
7721 {
7722 case 1:
7723 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7724 break;
7725 case 2:
7726 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7727 break;
7728 case 4:
7729 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7730 break;
7731 case 8:
7732 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7733 break;
7734#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7735 case sizeof(RTUINT128U):
7736 /*
7737 * No need to sync back the register with the stack, this is done by the generic variable handling
7738 * code if there is a register assigned to a variable and the stack must be accessed.
7739 */
7740 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7741 break;
7742 case sizeof(RTUINT256U):
7743 /*
7744 * No need to sync back the register with the stack, this is done by the generic variable handling
7745 * code if there is a register assigned to a variable and the stack must be accessed.
7746 */
7747 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7748 break;
7749#endif
7750 default:
7751 AssertFailed();
7752 }
7753 break;
7754
7755 case kIemNativeEmitMemOp_Fetch_Sx_U16:
7756 Assert(cbMem == 1);
7757 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7758 break;
7759
7760 case kIemNativeEmitMemOp_Fetch_Sx_U32:
7761 Assert(cbMem == 1 || cbMem == 2);
7762 if (cbMem == 1)
7763 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7764 else
7765 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7766 break;
7767
7768 case kIemNativeEmitMemOp_Fetch_Sx_U64:
7769 switch (cbMem)
7770 {
7771 case 1:
7772 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7773 break;
7774 case 2:
7775 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7776 break;
7777 case 4:
7778 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7779 break;
7780 default:
7781 AssertFailed();
7782 }
7783 break;
7784
7785 default:
7786 AssertFailed();
7787 }
7788
7789 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7790
7791 /*
7792 * TlbDone:
7793 */
7794 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7795
7796 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7797
7798# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7799 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7800 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7801# endif
7802 }
7803#else
7804 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
7805#endif
7806
7807 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7808 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7809 return off;
7810}
7811
7812
7813
7814/*********************************************************************************************************************************
7815* Memory fetches (IEM_MEM_FETCH_XXX). *
7816*********************************************************************************************************************************/
7817
7818/* 8-bit segmented: */
7819#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7820 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
7821 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7822 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7823
7824#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7825 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7826 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7827 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7828
7829#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7830 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7831 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7832 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7833
7834#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7835 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7836 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7837 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7838
7839#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7840 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7841 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7842 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7843
7844#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7845 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7846 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7847 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7848
7849#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7850 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7851 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7852 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7853
7854/* 16-bit segmented: */
7855#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7856 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7857 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7858 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7859
7860#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7861 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7862 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7863 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7864
7865#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7866 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7867 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7868 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7869
7870#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7871 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7872 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7873 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7874
7875#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7876 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7877 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7878 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7879
7880#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7881 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7882 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7883 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7884
7885
7886/* 32-bit segmented: */
7887#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7888 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7889 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7890 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7891
7892#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7893 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7894 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7895 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7896
7897#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7898 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7899 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7900 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7901
7902#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7903 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7904 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7905 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7906
7907#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7908 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7909 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7910 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7911
7912#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7913 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7914 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7915 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7916
7917#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7918 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7919 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7920 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7921
7922#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7923 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7924 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7925 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7926
7927#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7928 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
7929 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7930 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7931
7932AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7933#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7934 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
7935 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7936 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7937
7938
7939/* 64-bit segmented: */
7940#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7941 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7942 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7943 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7944
7945AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7946#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7947 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
7948 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7949 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7950
7951
7952/* 8-bit flat: */
7953#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7954 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
7955 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7956 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7957
7958#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7959 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7960 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7961 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7962
7963#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7964 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7965 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7966 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7967
7968#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7969 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7970 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7971 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7972
7973#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7974 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7975 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7976 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7977
7978#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7979 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7980 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7981 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7982
7983#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7984 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7985 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7986 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7987
7988
7989/* 16-bit flat: */
7990#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7991 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7992 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7993 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7994
7995#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7996 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7997 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7998 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7999
8000#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
8001 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
8002 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
8003 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
8004
8005#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
8006 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
8007 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
8008 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
8009
8010#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
8011 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
8012 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
8013 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
8014
8015#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
8016 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
8017 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
8018 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
8019
8020/* 32-bit flat: */
8021#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
8022 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
8023 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
8024 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
8025
8026#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
8027 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
8028 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
8029 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
8030
8031#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
8032 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
8033 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
8034 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
8035
8036#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
8037 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
8038 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
8039 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
8040
8041#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
8042 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
8043 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
8044 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
8045
8046#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
8047 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
8048 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
8049 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
8050
8051#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
8052 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
8053 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
8054 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
8055
8056#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
8057 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
8058 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
8059 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
8060
8061#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
8062 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
8063 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
8064 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
8065
8066#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
8067 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
8068 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
8069 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
8070
8071
8072/* 64-bit flat: */
8073#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
8074 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
8075 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
8076 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
8077
8078#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
8079 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
8080 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
8081 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
8082
8083#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8084/* 128-bit segmented: */
8085#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
8086 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
8087 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
8088 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
8089
8090#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
8091 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8092 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8093 kIemNativeEmitMemOp_Fetch, \
8094 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
8095
8096AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
8097#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
8098 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
8099 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8100 kIemNativeEmitMemOp_Fetch, \
8101 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
8102
8103#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
8104 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
8105 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
8106 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
8107
8108#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
8109 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
8110 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8111 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
8112
8113
8114/* 128-bit flat: */
8115#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
8116 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
8117 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
8118 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
8119
8120#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
8121 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8122 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8123 kIemNativeEmitMemOp_Fetch, \
8124 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
8125
8126#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
8127 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
8128 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8129 kIemNativeEmitMemOp_Fetch, \
8130 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
8131
8132#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
8133 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
8134 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
8135 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
8136
8137#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
8138 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
8139 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8140 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
8141
8142/* 256-bit segmented: */
8143#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
8144 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
8145 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8146 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8147
8148#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8149 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
8150 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8151 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8152
8153#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
8154 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
8155 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
8156 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8157
8158#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8159 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
8160 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8161 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8162
8163
8164/* 256-bit flat: */
8165#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
8166 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
8167 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8168 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8169
8170#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
8171 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
8172 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8173 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8174
8175#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
8176 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
8177 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
8178 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8179
8180#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
8181 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, \
8182 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8183 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8184
8185#endif
8186
8187
8188/*********************************************************************************************************************************
8189* Memory stores (IEM_MEM_STORE_XXX). *
8190*********************************************************************************************************************************/
8191
8192#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
8193 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
8194 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
8195 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8196
8197#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
8198 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
8199 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
8200 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8201
8202#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
8203 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
8204 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
8205 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8206
8207#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
8208 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
8209 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
8210 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8211
8212
8213#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
8214 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
8215 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
8216 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8217
8218#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
8219 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
8220 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
8221 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8222
8223#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
8224 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
8225 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
8226 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8227
8228#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
8229 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
8230 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
8231 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8232
8233
8234#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
8235 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8236 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8237
8238#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
8239 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8240 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8241
8242#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
8243 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8244 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8245
8246#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
8247 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8248 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8249
8250
8251#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
8252 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8253 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8254
8255#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
8256 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8257 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8258
8259#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
8260 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8261 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8262
8263#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
8264 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8265 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8266
8267/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
8268 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
8269DECL_INLINE_THROW(uint32_t)
8270iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
8271 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
8272{
8273 /*
8274 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
8275 * to do the grunt work.
8276 */
8277 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
8278 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
8279 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
8280 pfnFunction, idxInstr);
8281 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
8282 return off;
8283}
8284
8285
8286#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8287# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
8288 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8289 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8290 kIemNativeEmitMemOp_Store, \
8291 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
8292
8293# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
8294 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
8295 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
8296 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
8297
8298# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
8299 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
8300 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
8301 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
8302
8303# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
8304 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
8305 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8306 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8307
8308
8309# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
8310 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8311 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8312 kIemNativeEmitMemOp_Store, \
8313 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
8314
8315# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
8316 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
8317 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
8318 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
8319
8320# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
8321 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
8322 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
8323 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
8324
8325# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
8326 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
8327 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8328 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8329#endif
8330
8331
8332
8333/*********************************************************************************************************************************
8334* Stack Accesses. *
8335*********************************************************************************************************************************/
8336/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
8337#define IEM_MC_PUSH_U16(a_u16Value) \
8338 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8339 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
8340#define IEM_MC_PUSH_U32(a_u32Value) \
8341 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8342 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
8343#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
8344 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
8345 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
8346#define IEM_MC_PUSH_U64(a_u64Value) \
8347 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8348 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
8349
8350#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
8351 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8352 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8353#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
8354 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8355 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
8356#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
8357 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
8358 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
8359
8360#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
8361 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8362 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8363#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
8364 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8365 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
8366
8367
8368/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
8369DECL_INLINE_THROW(uint32_t)
8370iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
8371 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8372{
8373 /*
8374 * Assert sanity.
8375 */
8376 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8377 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
8378#ifdef VBOX_STRICT
8379 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8380 {
8381 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8382 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8383 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8384 Assert( pfnFunction
8385 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8386 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
8387 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
8388 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8389 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
8390 : UINT64_C(0xc000b000a0009000) ));
8391 }
8392 else
8393 Assert( pfnFunction
8394 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
8395 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
8396 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
8397 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
8398 : UINT64_C(0xc000b000a0009000) ));
8399#endif
8400
8401#ifdef VBOX_STRICT
8402 /*
8403 * Check that the fExec flags we've got make sense.
8404 */
8405 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8406#endif
8407
8408 /*
8409 * To keep things simple we have to commit any pending writes first as we
8410 * may end up making calls.
8411 */
8412 /** @todo we could postpone this till we make the call and reload the
8413 * registers after returning from the call. Not sure if that's sensible or
8414 * not, though. */
8415 off = iemNativeRegFlushPendingWrites(pReNative, off);
8416
8417 /*
8418 * First we calculate the new RSP and the effective stack pointer value.
8419 * For 64-bit mode and flat 32-bit these two are the same.
8420 * (Code structure is very similar to that of PUSH)
8421 */
8422 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8423 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
8424 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
8425 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
8426 ? cbMem : sizeof(uint16_t);
8427 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8428 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8429 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8430 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8431 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8432 if (cBitsFlat != 0)
8433 {
8434 Assert(idxRegEffSp == idxRegRsp);
8435 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8436 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8437 if (cBitsFlat == 64)
8438 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
8439 else
8440 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
8441 }
8442 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8443 {
8444 Assert(idxRegEffSp != idxRegRsp);
8445 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8446 kIemNativeGstRegUse_ReadOnly);
8447#ifdef RT_ARCH_AMD64
8448 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8449#else
8450 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8451#endif
8452 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8453 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8454 offFixupJumpToUseOtherBitSp = off;
8455 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8456 {
8457 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8458 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8459 }
8460 else
8461 {
8462 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8463 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8464 }
8465 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8466 }
8467 /* SpUpdateEnd: */
8468 uint32_t const offLabelSpUpdateEnd = off;
8469
8470 /*
8471 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8472 * we're skipping lookup).
8473 */
8474 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8475 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
8476 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8477 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8478 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8479 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8480 : UINT32_MAX;
8481 uint8_t const idxRegValue = !TlbState.fSkip
8482 && pVarValue->enmKind != kIemNativeVarKind_Immediate
8483 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
8484 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
8485 : UINT8_MAX;
8486 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
8487
8488
8489 if (!TlbState.fSkip)
8490 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8491 else
8492 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8493
8494 /*
8495 * Use16BitSp:
8496 */
8497 if (cBitsFlat == 0)
8498 {
8499#ifdef RT_ARCH_AMD64
8500 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8501#else
8502 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8503#endif
8504 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8505 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8506 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8507 else
8508 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8509 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8510 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8511 }
8512
8513 /*
8514 * TlbMiss:
8515 *
8516 * Call helper to do the pushing.
8517 */
8518 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8519
8520#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8521 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8522#else
8523 RT_NOREF(idxInstr);
8524#endif
8525
8526 /* Save variables in volatile registers. */
8527 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8528 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8529 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
8530 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
8531 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8532
8533 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
8534 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
8535 {
8536 /* Swap them using ARG0 as temp register: */
8537 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
8538 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
8539 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
8540 }
8541 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
8542 {
8543 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
8544 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
8545 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8546
8547 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
8548 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8549 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8550 }
8551 else
8552 {
8553 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
8554 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8555
8556 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
8557 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
8558 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
8559 }
8560
8561#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8562 /* Do delayed EFLAGS calculations. */
8563 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
8564 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8565#endif
8566
8567 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8568 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8569
8570 /* Done setting up parameters, make the call. */
8571 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8572
8573 /* Restore variables and guest shadow registers to volatile registers. */
8574 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8575 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8576
8577#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8578 if (!TlbState.fSkip)
8579 {
8580 /* end of TlbMiss - Jump to the done label. */
8581 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8582 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8583
8584 /*
8585 * TlbLookup:
8586 */
8587 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
8588 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8589
8590 /*
8591 * Emit code to do the actual storing / fetching.
8592 */
8593 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
8594# ifdef IEM_WITH_TLB_STATISTICS
8595 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8596 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8597# endif
8598 if (idxRegValue != UINT8_MAX)
8599 {
8600 switch (cbMemAccess)
8601 {
8602 case 2:
8603 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8604 break;
8605 case 4:
8606 if (!fIsIntelSeg)
8607 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8608 else
8609 {
8610 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
8611 PUSH FS in real mode, so we have to try emulate that here.
8612 We borrow the now unused idxReg1 from the TLB lookup code here. */
8613 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
8614 kIemNativeGstReg_EFlags);
8615 if (idxRegEfl != UINT8_MAX)
8616 {
8617#ifdef ARCH_AMD64
8618 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
8619 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8620 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8621#else
8622 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
8623 off, TlbState.idxReg1, idxRegEfl,
8624 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8625#endif
8626 iemNativeRegFreeTmp(pReNative, idxRegEfl);
8627 }
8628 else
8629 {
8630 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
8631 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8632 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8633 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8634 }
8635 /* ASSUMES the upper half of idxRegValue is ZERO. */
8636 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
8637 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
8638 }
8639 break;
8640 case 8:
8641 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8642 break;
8643 default:
8644 AssertFailed();
8645 }
8646 }
8647 else
8648 {
8649 switch (cbMemAccess)
8650 {
8651 case 2:
8652 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
8653 idxRegMemResult, TlbState.idxReg1);
8654 break;
8655 case 4:
8656 Assert(!fIsSegReg);
8657 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
8658 idxRegMemResult, TlbState.idxReg1);
8659 break;
8660 case 8:
8661 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
8662 break;
8663 default:
8664 AssertFailed();
8665 }
8666 }
8667
8668 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8669 TlbState.freeRegsAndReleaseVars(pReNative);
8670
8671 /*
8672 * TlbDone:
8673 *
8674 * Commit the new RSP value.
8675 */
8676 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8677 }
8678#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8679
8680#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8681 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
8682#endif
8683 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8684 if (idxRegEffSp != idxRegRsp)
8685 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8686
8687 /* The value variable is implictly flushed. */
8688 if (idxRegValue != UINT8_MAX)
8689 iemNativeVarRegisterRelease(pReNative, idxVarValue);
8690 iemNativeVarFreeLocal(pReNative, idxVarValue);
8691
8692 return off;
8693}
8694
8695
8696
8697/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
8698#define IEM_MC_POP_GREG_U16(a_iGReg) \
8699 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8700 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
8701#define IEM_MC_POP_GREG_U32(a_iGReg) \
8702 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8703 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
8704#define IEM_MC_POP_GREG_U64(a_iGReg) \
8705 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8706 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
8707
8708#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
8709 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8710 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8711#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
8712 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8713 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
8714
8715#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
8716 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8717 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8718#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
8719 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8720 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
8721
8722
8723DECL_FORCE_INLINE_THROW(uint32_t)
8724iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
8725 uint8_t idxRegTmp)
8726{
8727 /* Use16BitSp: */
8728#ifdef RT_ARCH_AMD64
8729 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8730 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
8731 RT_NOREF(idxRegTmp);
8732#else
8733 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
8734 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
8735 /* add tmp, regrsp, #cbMem */
8736 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
8737 /* and tmp, tmp, #0xffff */
8738 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
8739 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
8740 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
8741 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
8742#endif
8743 return off;
8744}
8745
8746
8747DECL_FORCE_INLINE(uint32_t)
8748iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
8749{
8750 /* Use32BitSp: */
8751 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8752 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
8753 return off;
8754}
8755
8756
8757/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
8758DECL_INLINE_THROW(uint32_t)
8759iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
8760 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8761{
8762 /*
8763 * Assert sanity.
8764 */
8765 Assert(idxGReg < 16);
8766#ifdef VBOX_STRICT
8767 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8768 {
8769 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8770 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8771 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8772 Assert( pfnFunction
8773 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8774 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
8775 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8776 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
8777 : UINT64_C(0xc000b000a0009000) ));
8778 }
8779 else
8780 Assert( pfnFunction
8781 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
8782 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
8783 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
8784 : UINT64_C(0xc000b000a0009000) ));
8785#endif
8786
8787#ifdef VBOX_STRICT
8788 /*
8789 * Check that the fExec flags we've got make sense.
8790 */
8791 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8792#endif
8793
8794 /*
8795 * To keep things simple we have to commit any pending writes first as we
8796 * may end up making calls.
8797 */
8798 off = iemNativeRegFlushPendingWrites(pReNative, off);
8799
8800 /*
8801 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
8802 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
8803 * directly as the effective stack pointer.
8804 * (Code structure is very similar to that of PUSH)
8805 */
8806 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8807 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8808 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8809 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8810 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8811 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8812 * will be the resulting register value. */
8813 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8814
8815 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8816 if (cBitsFlat != 0)
8817 {
8818 Assert(idxRegEffSp == idxRegRsp);
8819 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8820 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8821 }
8822 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8823 {
8824 Assert(idxRegEffSp != idxRegRsp);
8825 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8826 kIemNativeGstRegUse_ReadOnly);
8827#ifdef RT_ARCH_AMD64
8828 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8829#else
8830 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8831#endif
8832 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8833 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8834 offFixupJumpToUseOtherBitSp = off;
8835 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8836 {
8837/** @todo can skip idxRegRsp updating when popping ESP. */
8838 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8839 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8840 }
8841 else
8842 {
8843 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8844 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8845 }
8846 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8847 }
8848 /* SpUpdateEnd: */
8849 uint32_t const offLabelSpUpdateEnd = off;
8850
8851 /*
8852 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8853 * we're skipping lookup).
8854 */
8855 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8856 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8857 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8858 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8859 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8860 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8861 : UINT32_MAX;
8862
8863 if (!TlbState.fSkip)
8864 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8865 else
8866 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8867
8868 /*
8869 * Use16BitSp:
8870 */
8871 if (cBitsFlat == 0)
8872 {
8873#ifdef RT_ARCH_AMD64
8874 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8875#else
8876 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8877#endif
8878 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8879 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8880 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8881 else
8882 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8883 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8884 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8885 }
8886
8887 /*
8888 * TlbMiss:
8889 *
8890 * Call helper to do the pushing.
8891 */
8892 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8893
8894#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8895 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8896#else
8897 RT_NOREF(idxInstr);
8898#endif
8899
8900 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8901 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8902 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8903 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8904
8905
8906 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8907 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8908 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8909
8910#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8911 /* Do delayed EFLAGS calculations. */
8912 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8913#endif
8914
8915 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8916 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8917
8918 /* Done setting up parameters, make the call. */
8919 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8920
8921 /* Move the return register content to idxRegMemResult. */
8922 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8923 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8924
8925 /* Restore variables and guest shadow registers to volatile registers. */
8926 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8927 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8928
8929#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8930 if (!TlbState.fSkip)
8931 {
8932 /* end of TlbMiss - Jump to the done label. */
8933 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8934 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8935
8936 /*
8937 * TlbLookup:
8938 */
8939 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
8940 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8941
8942 /*
8943 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8944 */
8945 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8946# ifdef IEM_WITH_TLB_STATISTICS
8947 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8948 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8949# endif
8950 switch (cbMem)
8951 {
8952 case 2:
8953 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8954 break;
8955 case 4:
8956 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8957 break;
8958 case 8:
8959 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8960 break;
8961 default:
8962 AssertFailed();
8963 }
8964
8965 TlbState.freeRegsAndReleaseVars(pReNative);
8966
8967 /*
8968 * TlbDone:
8969 *
8970 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8971 * commit the popped register value.
8972 */
8973 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8974 }
8975#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8976
8977 if (idxGReg != X86_GREG_xSP)
8978 {
8979 /* Set the register. */
8980 if (cbMem >= sizeof(uint32_t))
8981 {
8982#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8983 AssertMsg( pReNative->idxCurCall == 0
8984 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8985 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8986 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8987#endif
8988 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8989#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8990 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8991#endif
8992#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8993 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8994 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8995#endif
8996 }
8997 else
8998 {
8999 Assert(cbMem == sizeof(uint16_t));
9000 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
9001 kIemNativeGstRegUse_ForUpdate);
9002 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
9003#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
9004 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
9005#endif
9006 iemNativeRegFreeTmp(pReNative, idxRegDst);
9007 }
9008
9009 /* Complete RSP calculation for FLAT mode. */
9010 if (idxRegEffSp == idxRegRsp)
9011 {
9012 if (cBitsFlat == 64)
9013 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
9014 else
9015 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
9016 }
9017 }
9018 else
9019 {
9020 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
9021 if (cbMem == sizeof(uint64_t))
9022 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
9023 else if (cbMem == sizeof(uint32_t))
9024 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
9025 else
9026 {
9027 if (idxRegEffSp == idxRegRsp)
9028 {
9029 if (cBitsFlat == 64)
9030 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
9031 else
9032 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
9033 }
9034 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
9035 }
9036 }
9037
9038#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
9039 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
9040#endif
9041
9042 iemNativeRegFreeTmp(pReNative, idxRegRsp);
9043 if (idxRegEffSp != idxRegRsp)
9044 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
9045 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
9046
9047 return off;
9048}
9049
9050
9051
9052/*********************************************************************************************************************************
9053* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
9054*********************************************************************************************************************************/
9055
9056#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9057 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9058 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
9059 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
9060
9061#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9062 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9063 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
9064 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
9065
9066#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9067 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9068 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
9069 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
9070
9071#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9072 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
9073 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
9074 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
9075
9076
9077#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9078 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9079 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9080 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
9081
9082#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9083 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9084 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9085 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
9086
9087#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9088 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9089 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9090 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
9091
9092#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9093 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
9094 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9095 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
9096
9097#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9098 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
9099 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9100 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
9101
9102
9103#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9104 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9105 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9106 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
9107
9108#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9109 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9110 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9111 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
9112
9113#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9114 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9115 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9116 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
9117
9118#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9119 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
9120 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9121 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
9122
9123#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9124 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
9125 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9126 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
9127
9128
9129#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9130 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9131 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9132 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
9133
9134#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9135 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9136 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9137 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
9138#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9139 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9140 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9141 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
9142
9143#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9144 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
9145 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9146 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
9147
9148#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9149 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
9150 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9151 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
9152
9153
9154#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9155 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
9156 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9157 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
9158
9159#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9160 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
9161 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
9162 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
9163
9164
9165#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9166 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9167 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9168 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
9169
9170#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9171 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9172 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9173 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
9174
9175#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9176 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9177 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9178 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
9179
9180#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9181 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9182 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9183 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
9184
9185
9186
9187#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9188 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9189 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
9190 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
9191
9192#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9193 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9194 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
9195 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
9196
9197#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9198 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9199 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
9200 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
9201
9202#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9203 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9204 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
9205 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
9206
9207
9208#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9209 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9210 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9211 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
9212
9213#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9214 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9215 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9216 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9217
9218#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9219 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9220 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9221 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9222
9223#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9224 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9225 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9226 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9227
9228#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9229 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
9230 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9231 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9232
9233
9234#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9235 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9236 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9237 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
9238
9239#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9240 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9241 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9242 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9243
9244#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9245 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9246 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9247 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9248
9249#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9250 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9251 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9252 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9253
9254#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9255 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
9256 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9257 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9258
9259
9260#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9261 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9262 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9263 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
9264
9265#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9266 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9267 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9268 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9269
9270#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9271 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9272 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9273 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9274
9275#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9276 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9277 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9278 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9279
9280#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9281 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
9282 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9283 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9284
9285
9286#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9287 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9288 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9289 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9290
9291#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9292 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9293 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
9294 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9295
9296
9297#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9298 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9299 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9300 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
9301
9302#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9303 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9304 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9305 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9306
9307#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9308 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9309 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9310 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9311
9312#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9313 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9314 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9315 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9316
9317
9318DECL_INLINE_THROW(uint32_t)
9319iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9320 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
9321 uintptr_t pfnFunction, uint8_t idxInstr)
9322{
9323 /*
9324 * Assert sanity.
9325 */
9326 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9327 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
9328 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
9329 && pVarMem->cbVar == sizeof(void *),
9330 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9331
9332 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9333 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9334 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
9335 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
9336 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9337
9338 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
9339 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9340 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
9341 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
9342 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9343
9344 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
9345
9346 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9347
9348#ifdef VBOX_STRICT
9349# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
9350 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9351 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9352 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
9353 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9354# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9355 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
9356 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
9357 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
9358
9359 if (iSegReg == UINT8_MAX)
9360 {
9361 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9362 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9363 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9364 switch (cbMem)
9365 {
9366 case 1:
9367 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
9368 Assert(!fAlignMaskAndCtl);
9369 break;
9370 case 2:
9371 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
9372 Assert(fAlignMaskAndCtl < 2);
9373 break;
9374 case 4:
9375 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
9376 Assert(fAlignMaskAndCtl < 4);
9377 break;
9378 case 8:
9379 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
9380 Assert(fAlignMaskAndCtl < 8);
9381 break;
9382 case 10:
9383 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9384 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9385 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9386 Assert(fAlignMaskAndCtl < 8);
9387 break;
9388 case 16:
9389 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
9390 Assert(fAlignMaskAndCtl < 16);
9391 break;
9392# if 0
9393 case 32:
9394 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
9395 Assert(fAlignMaskAndCtl < 32);
9396 break;
9397 case 64:
9398 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
9399 Assert(fAlignMaskAndCtl < 64);
9400 break;
9401# endif
9402 default: AssertFailed(); break;
9403 }
9404 }
9405 else
9406 {
9407 Assert(iSegReg < 6);
9408 switch (cbMem)
9409 {
9410 case 1:
9411 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
9412 Assert(!fAlignMaskAndCtl);
9413 break;
9414 case 2:
9415 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
9416 Assert(fAlignMaskAndCtl < 2);
9417 break;
9418 case 4:
9419 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
9420 Assert(fAlignMaskAndCtl < 4);
9421 break;
9422 case 8:
9423 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
9424 Assert(fAlignMaskAndCtl < 8);
9425 break;
9426 case 10:
9427 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9428 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9429 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9430 Assert(fAlignMaskAndCtl < 8);
9431 break;
9432 case 16:
9433 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
9434 Assert(fAlignMaskAndCtl < 16);
9435 break;
9436# if 0
9437 case 32:
9438 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
9439 Assert(fAlignMaskAndCtl < 32);
9440 break;
9441 case 64:
9442 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
9443 Assert(fAlignMaskAndCtl < 64);
9444 break;
9445# endif
9446 default: AssertFailed(); break;
9447 }
9448 }
9449# undef IEM_MAP_HLP_FN
9450# undef IEM_MAP_HLP_FN_NO_AT
9451#endif
9452
9453#ifdef VBOX_STRICT
9454 /*
9455 * Check that the fExec flags we've got make sense.
9456 */
9457 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9458#endif
9459
9460 /*
9461 * To keep things simple we have to commit any pending writes first as we
9462 * may end up making calls.
9463 */
9464 off = iemNativeRegFlushPendingWrites(pReNative, off);
9465
9466#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9467 /*
9468 * Move/spill/flush stuff out of call-volatile registers.
9469 * This is the easy way out. We could contain this to the tlb-miss branch
9470 * by saving and restoring active stuff here.
9471 */
9472 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9473 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9474#endif
9475
9476 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
9477 while the tlb-miss codepath will temporarily put it on the stack.
9478 Set the the type to stack here so we don't need to do it twice below. */
9479 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9480 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
9481 /** @todo use a tmp register from TlbState, since they'll be free after tlb
9482 * lookup is done. */
9483
9484 /*
9485 * Define labels and allocate the result register (trying for the return
9486 * register if we can).
9487 */
9488 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9489 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9490 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
9491 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
9492 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
9493 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
9494 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
9495 : UINT32_MAX;
9496
9497 /*
9498 * Jump to the TLB lookup code.
9499 */
9500 if (!TlbState.fSkip)
9501 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
9502
9503 /*
9504 * TlbMiss:
9505 *
9506 * Call helper to do the fetching.
9507 * We flush all guest register shadow copies here.
9508 */
9509 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
9510
9511#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9512 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9513#else
9514 RT_NOREF(idxInstr);
9515#endif
9516
9517#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9518 /* Save variables in volatile registers. */
9519 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
9520 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
9521#endif
9522
9523 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
9524 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
9525#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9526 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
9527#else
9528 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9529#endif
9530
9531 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9532 if (iSegReg != UINT8_MAX)
9533 {
9534 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9535 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9536 }
9537
9538#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9539 /* Do delayed EFLAGS calculations. */
9540 if (iSegReg == UINT8_MAX)
9541 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
9542 fHstRegsNotToSave);
9543 else
9544 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
9545 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
9546 fHstRegsNotToSave);
9547#endif
9548
9549 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
9550 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
9551 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
9552
9553 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9554 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9555
9556 /* Done setting up parameters, make the call. */
9557 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9558
9559 /*
9560 * Put the output in the right registers.
9561 */
9562 Assert(idxRegMemResult == pVarMem->idxReg);
9563 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9564 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9565
9566#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9567 /* Restore variables and guest shadow registers to volatile registers. */
9568 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
9569 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
9570#endif
9571
9572 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
9573 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
9574
9575#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9576 if (!TlbState.fSkip)
9577 {
9578 /* end of tlbsmiss - Jump to the done label. */
9579 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9580 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
9581
9582 /*
9583 * TlbLookup:
9584 */
9585 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
9586 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
9587# ifdef IEM_WITH_TLB_STATISTICS
9588 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
9589 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
9590# endif
9591
9592 /* [idxVarUnmapInfo] = 0; */
9593 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
9594
9595 /*
9596 * TlbDone:
9597 */
9598 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9599
9600 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
9601
9602# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9603 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
9604 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9605# endif
9606 }
9607#else
9608 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
9609#endif
9610
9611 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9612 iemNativeVarRegisterRelease(pReNative, idxVarMem);
9613
9614 return off;
9615}
9616
9617
9618#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
9619 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
9620 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
9621
9622#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9623 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
9624 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
9625
9626#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9627 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
9628 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
9629
9630#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9631 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
9632 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
9633
9634DECL_INLINE_THROW(uint32_t)
9635iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9636 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
9637{
9638 /*
9639 * Assert sanity.
9640 */
9641 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9642#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
9643 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9644#endif
9645 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
9646 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9647 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9648#ifdef VBOX_STRICT
9649 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
9650 {
9651 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
9652 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
9653 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
9654 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9655 case IEM_ACCESS_TYPE_WRITE:
9656 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9657 case IEM_ACCESS_TYPE_READ:
9658 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9659 default: AssertFailed();
9660 }
9661#else
9662 RT_NOREF(fAccess);
9663#endif
9664
9665 /*
9666 * To keep things simple we have to commit any pending writes first as we
9667 * may end up making calls (there shouldn't be any at this point, so this
9668 * is just for consistency).
9669 */
9670 /** @todo we could postpone this till we make the call and reload the
9671 * registers after returning from the call. Not sure if that's sensible or
9672 * not, though. */
9673 off = iemNativeRegFlushPendingWrites(pReNative, off);
9674
9675 /*
9676 * Move/spill/flush stuff out of call-volatile registers.
9677 *
9678 * We exclude any register holding the bUnmapInfo variable, as we'll be
9679 * checking it after returning from the call and will free it afterwards.
9680 */
9681 /** @todo save+restore active registers and maybe guest shadows in miss
9682 * scenario. */
9683 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
9684 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
9685
9686 /*
9687 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9688 * to call the unmap helper function.
9689 *
9690 * The likelyhood of it being zero is higher than for the TLB hit when doing
9691 * the mapping, as a TLB miss for an well aligned and unproblematic memory
9692 * access should also end up with a mapping that won't need special unmapping.
9693 */
9694 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
9695 * should speed up things for the pure interpreter as well when TLBs
9696 * are enabled. */
9697#ifdef RT_ARCH_AMD64
9698 if (pVarUnmapInfo->idxReg == UINT8_MAX)
9699 {
9700 /* test byte [rbp - xxx], 0ffh */
9701 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9702 pbCodeBuf[off++] = 0xf6;
9703 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
9704 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9705 pbCodeBuf[off++] = 0xff;
9706 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9707 }
9708 else
9709#endif
9710 {
9711 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
9712 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
9713 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9714 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9715 }
9716 uint32_t const offJmpFixup = off;
9717 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
9718
9719 /*
9720 * Call the unmap helper function.
9721 */
9722#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9723 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9724#else
9725 RT_NOREF(idxInstr);
9726#endif
9727
9728 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
9729 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
9730 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9731
9732 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9733 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9734
9735 /* Done setting up parameters, make the call.
9736 Note! Since we can only end up here if we took a TLB miss, any postponed EFLAGS
9737 calculations has been done there already. Thus, a_fSkipEflChecks = true. */
9738 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9739
9740 /* The bUnmapInfo variable is implictly free by these MCs. */
9741 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
9742
9743 /*
9744 * Done, just fixup the jump for the non-call case.
9745 */
9746 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9747
9748 return off;
9749}
9750
9751
9752
9753/*********************************************************************************************************************************
9754* State and Exceptions *
9755*********************************************************************************************************************************/
9756
9757#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9758#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9759
9760#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9761#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9762#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9763
9764#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9765#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9766#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9767
9768
9769DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9770{
9771#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
9772 RT_NOREF(pReNative, fForChange);
9773#else
9774 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
9775 && fForChange)
9776 {
9777# ifdef RT_ARCH_AMD64
9778
9779 /* Need to save the host MXCSR the first time, and clear the exception flags. */
9780 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9781 {
9782 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9783
9784 /* stmxcsr */
9785 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9786 pbCodeBuf[off++] = X86_OP_REX_B;
9787 pbCodeBuf[off++] = 0x0f;
9788 pbCodeBuf[off++] = 0xae;
9789 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9790 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9791 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9792 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9793 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9795
9796 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9797 }
9798
9799 uint8_t const idxRegTmp = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
9800 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9801 kIemNativeGstRegUse_ReadOnly);
9802
9803 /*
9804 * Mask any exceptions and clear the exception status and save into MXCSR,
9805 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
9806 * a register source/target (sigh).
9807 */
9808 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
9809 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
9810 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
9811 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9812
9813 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9814
9815 /* ldmxcsr */
9816 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9817 pbCodeBuf[off++] = X86_OP_REX_B;
9818 pbCodeBuf[off++] = 0x0f;
9819 pbCodeBuf[off++] = 0xae;
9820 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9821 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9822 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9823 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9824 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9825 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9826
9827 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9828 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9829
9830# elif defined(RT_ARCH_ARM64)
9831 uint8_t const idxRegTmp = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
9832
9833 /* Need to save the host floating point control register the first time, clear FPSR. */
9834 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9835 {
9836 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9837 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9838 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9839 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9840 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9841 }
9842
9843 /*
9844 * Translate MXCSR to FPCR.
9845 *
9846 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9847 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9848 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9849 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9850 */
9851 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9852 * and implement alternate handling if FEAT_AFP is present. */
9853 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9854 kIemNativeGstRegUse_ReadOnly);
9855
9856 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9857
9858 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9859 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9860
9861 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9862 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9863 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9864 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9865 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9866
9867 /*
9868 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9869 *
9870 * Value MXCSR FPCR
9871 * 0 RN RN
9872 * 1 R- R+
9873 * 2 R+ R-
9874 * 3 RZ RZ
9875 *
9876 * Conversion can be achieved by switching bit positions
9877 */
9878 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9879 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9880 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9881 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9882
9883 /* Write the value to FPCR. */
9884 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9885
9886 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9887 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9888 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9889# else
9890# error "Port me"
9891# endif
9892 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9893 }
9894#endif
9895 return off;
9896}
9897
9898
9899
9900/*********************************************************************************************************************************
9901* Emitters for FPU related operations. *
9902*********************************************************************************************************************************/
9903
9904#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9905 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9906
9907/** Emits code for IEM_MC_FETCH_FCW. */
9908DECL_INLINE_THROW(uint32_t)
9909iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9910{
9911 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9912 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9913
9914 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9915
9916 /* Allocate a temporary FCW register. */
9917 /** @todo eliminate extra register */
9918 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9919 kIemNativeGstRegUse_ReadOnly);
9920
9921 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9922
9923 /* Free but don't flush the FCW register. */
9924 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9925 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9926
9927 return off;
9928}
9929
9930
9931#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9932 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9933
9934/** Emits code for IEM_MC_FETCH_FSW. */
9935DECL_INLINE_THROW(uint32_t)
9936iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9937{
9938 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9939 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9940
9941 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
9942 /* Allocate a temporary FSW register. */
9943 /** @todo eliminate extra register */
9944 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9945 kIemNativeGstRegUse_ReadOnly);
9946
9947 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9948
9949 /* Free but don't flush the FSW register. */
9950 iemNativeRegFreeTmp(pReNative, idxFswReg);
9951 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9952
9953 return off;
9954}
9955
9956
9957
9958#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9959
9960
9961/*********************************************************************************************************************************
9962* Emitters for SSE/AVX specific operations. *
9963*********************************************************************************************************************************/
9964
9965#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9966 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9967
9968/** Emits code for IEM_MC_COPY_XREG_U128. */
9969DECL_INLINE_THROW(uint32_t)
9970iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9971{
9972 /* This is a nop if the source and destination register are the same. */
9973 if (iXRegDst != iXRegSrc)
9974 {
9975 /* Allocate destination and source register. */
9976 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9977 kIemNativeGstSimdRegLdStSz_Low128,
9978 kIemNativeGstRegUse_ForFullWrite);
9979 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9980 kIemNativeGstSimdRegLdStSz_Low128,
9981 kIemNativeGstRegUse_ReadOnly);
9982
9983 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9984
9985 /* Free but don't flush the source and destination register. */
9986 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9987 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9988 }
9989
9990 return off;
9991}
9992
9993
9994#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9995 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9996
9997/** Emits code for IEM_MC_FETCH_XREG_U128. */
9998DECL_INLINE_THROW(uint32_t)
9999iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
10000{
10001 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10002 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10003
10004 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10005 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
10006
10007 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10008
10009 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10010
10011 /* Free but don't flush the source register. */
10012 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10013 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10014
10015 return off;
10016}
10017
10018
10019#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
10020 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
10021
10022#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
10023 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
10024
10025/** Emits code for IEM_MC_FETCH_XREG_U64. */
10026DECL_INLINE_THROW(uint32_t)
10027iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
10028{
10029 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10030 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10031
10032 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10033 kIemNativeGstSimdRegLdStSz_Low128,
10034 kIemNativeGstRegUse_ReadOnly);
10035
10036 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10037 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10038
10039 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10040
10041 /* Free but don't flush the source register. */
10042 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10043 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10044
10045 return off;
10046}
10047
10048
10049#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
10050 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
10051
10052#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
10053 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
10054
10055/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
10056DECL_INLINE_THROW(uint32_t)
10057iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
10058{
10059 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10060 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10061
10062 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10063 kIemNativeGstSimdRegLdStSz_Low128,
10064 kIemNativeGstRegUse_ReadOnly);
10065
10066 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10067 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10068
10069 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10070
10071 /* Free but don't flush the source register. */
10072 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10073 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10074
10075 return off;
10076}
10077
10078
10079#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
10080 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
10081
10082/** Emits code for IEM_MC_FETCH_XREG_U16. */
10083DECL_INLINE_THROW(uint32_t)
10084iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
10085{
10086 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10087 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
10088
10089 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10090 kIemNativeGstSimdRegLdStSz_Low128,
10091 kIemNativeGstRegUse_ReadOnly);
10092
10093 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10094 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10095
10096 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
10097
10098 /* Free but don't flush the source register. */
10099 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10100 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10101
10102 return off;
10103}
10104
10105
10106#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
10107 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
10108
10109/** Emits code for IEM_MC_FETCH_XREG_U8. */
10110DECL_INLINE_THROW(uint32_t)
10111iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
10112{
10113 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10114 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
10115
10116 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10117 kIemNativeGstSimdRegLdStSz_Low128,
10118 kIemNativeGstRegUse_ReadOnly);
10119
10120 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10121 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10122
10123 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
10124
10125 /* Free but don't flush the source register. */
10126 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10127 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10128
10129 return off;
10130}
10131
10132
10133#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
10134 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
10135
10136AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
10137#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
10138 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
10139
10140
10141/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
10142DECL_INLINE_THROW(uint32_t)
10143iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10144{
10145 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10146 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10147
10148 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10149 kIemNativeGstSimdRegLdStSz_Low128,
10150 kIemNativeGstRegUse_ForFullWrite);
10151 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10152
10153 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10154
10155 /* Free but don't flush the source register. */
10156 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10157 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10158
10159 return off;
10160}
10161
10162
10163#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
10164 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
10165
10166#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
10167 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
10168
10169#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
10170 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
10171
10172#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
10173 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
10174
10175#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
10176 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
10177
10178#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
10179 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
10180
10181/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
10182DECL_INLINE_THROW(uint32_t)
10183iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
10184 uint8_t cbLocal, uint8_t iElem)
10185{
10186 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10187 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
10188
10189#ifdef VBOX_STRICT
10190 switch (cbLocal)
10191 {
10192 case sizeof(uint64_t): Assert(iElem < 2); break;
10193 case sizeof(uint32_t): Assert(iElem < 4); break;
10194 case sizeof(uint16_t): Assert(iElem < 8); break;
10195 case sizeof(uint8_t): Assert(iElem < 16); break;
10196 default: AssertFailed();
10197 }
10198#endif
10199
10200 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10201 kIemNativeGstSimdRegLdStSz_Low128,
10202 kIemNativeGstRegUse_ForUpdate);
10203 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10204
10205 switch (cbLocal)
10206 {
10207 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10208 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10209 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10210 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10211 default: AssertFailed();
10212 }
10213
10214 /* Free but don't flush the source register. */
10215 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10216 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10217
10218 return off;
10219}
10220
10221
10222#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
10223 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
10224
10225/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
10226DECL_INLINE_THROW(uint32_t)
10227iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10228{
10229 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10230 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10231
10232 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10233 kIemNativeGstSimdRegLdStSz_Low128,
10234 kIemNativeGstRegUse_ForUpdate);
10235 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10236
10237 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
10238 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10239 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10240
10241 /* Free but don't flush the source register. */
10242 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10243 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10244
10245 return off;
10246}
10247
10248
10249#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
10250 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
10251
10252/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
10253DECL_INLINE_THROW(uint32_t)
10254iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10255{
10256 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10257 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10258
10259 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10260 kIemNativeGstSimdRegLdStSz_Low128,
10261 kIemNativeGstRegUse_ForUpdate);
10262 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10263
10264 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
10265 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10266 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10267
10268 /* Free but don't flush the source register. */
10269 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10270 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10271
10272 return off;
10273}
10274
10275
10276#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
10277 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
10278
10279/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
10280DECL_INLINE_THROW(uint32_t)
10281iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
10282 uint8_t idxSrcVar, uint8_t iDwSrc)
10283{
10284 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10285 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10286
10287 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10288 kIemNativeGstSimdRegLdStSz_Low128,
10289 kIemNativeGstRegUse_ForUpdate);
10290 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10291
10292 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
10293 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
10294
10295 /* Free but don't flush the destination register. */
10296 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10297 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10298
10299 return off;
10300}
10301
10302
10303#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10304 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10305
10306/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
10307DECL_INLINE_THROW(uint32_t)
10308iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10309{
10310 /*
10311 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10312 * if iYRegDst gets allocated first for the full write it won't load the
10313 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10314 * duplicated from the already allocated host register for iYRegDst containing
10315 * garbage. This will be catched by the guest register value checking in debug
10316 * builds.
10317 */
10318 if (iYRegDst != iYRegSrc)
10319 {
10320 /* Allocate destination and source register. */
10321 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10322 kIemNativeGstSimdRegLdStSz_256,
10323 kIemNativeGstRegUse_ForFullWrite);
10324 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10325 kIemNativeGstSimdRegLdStSz_Low128,
10326 kIemNativeGstRegUse_ReadOnly);
10327
10328 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10329 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10330
10331 /* Free but don't flush the source and destination register. */
10332 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10333 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10334 }
10335 else
10336 {
10337 /* This effectively only clears the upper 128-bits of the register. */
10338 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10339 kIemNativeGstSimdRegLdStSz_High128,
10340 kIemNativeGstRegUse_ForFullWrite);
10341
10342 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10343
10344 /* Free but don't flush the destination register. */
10345 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10346 }
10347
10348 return off;
10349}
10350
10351
10352#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10353 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10354
10355/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
10356DECL_INLINE_THROW(uint32_t)
10357iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10358{
10359 /*
10360 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10361 * if iYRegDst gets allocated first for the full write it won't load the
10362 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10363 * duplicated from the already allocated host register for iYRegDst containing
10364 * garbage. This will be catched by the guest register value checking in debug
10365 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
10366 * for a zmm register we don't support yet, so this is just a nop.
10367 */
10368 if (iYRegDst != iYRegSrc)
10369 {
10370 /* Allocate destination and source register. */
10371 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10372 kIemNativeGstSimdRegLdStSz_256,
10373 kIemNativeGstRegUse_ReadOnly);
10374 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10375 kIemNativeGstSimdRegLdStSz_256,
10376 kIemNativeGstRegUse_ForFullWrite);
10377
10378 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10379
10380 /* Free but don't flush the source and destination register. */
10381 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10382 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10383 }
10384
10385 return off;
10386}
10387
10388
10389#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
10390 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
10391
10392/** Emits code for IEM_MC_FETCH_YREG_U128. */
10393DECL_INLINE_THROW(uint32_t)
10394iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
10395{
10396 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10397 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10398
10399 Assert(iDQWord <= 1);
10400 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10401 iDQWord == 1
10402 ? kIemNativeGstSimdRegLdStSz_High128
10403 : kIemNativeGstSimdRegLdStSz_Low128,
10404 kIemNativeGstRegUse_ReadOnly);
10405
10406 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10407 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10408
10409 if (iDQWord == 1)
10410 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10411 else
10412 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10413
10414 /* Free but don't flush the source register. */
10415 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10416 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10417
10418 return off;
10419}
10420
10421
10422#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
10423 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
10424
10425/** Emits code for IEM_MC_FETCH_YREG_U64. */
10426DECL_INLINE_THROW(uint32_t)
10427iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
10428{
10429 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10430 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10431
10432 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10433 iQWord >= 2
10434 ? kIemNativeGstSimdRegLdStSz_High128
10435 : kIemNativeGstSimdRegLdStSz_Low128,
10436 kIemNativeGstRegUse_ReadOnly);
10437
10438 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10439 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10440
10441 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10442
10443 /* Free but don't flush the source register. */
10444 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10445 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10446
10447 return off;
10448}
10449
10450
10451#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
10452 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
10453
10454/** Emits code for IEM_MC_FETCH_YREG_U32. */
10455DECL_INLINE_THROW(uint32_t)
10456iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
10457{
10458 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10459 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10460
10461 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10462 iDWord >= 4
10463 ? kIemNativeGstSimdRegLdStSz_High128
10464 : kIemNativeGstSimdRegLdStSz_Low128,
10465 kIemNativeGstRegUse_ReadOnly);
10466
10467 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10468 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10469
10470 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10471
10472 /* Free but don't flush the source register. */
10473 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10474 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10475
10476 return off;
10477}
10478
10479
10480#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
10481 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
10482
10483/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
10484DECL_INLINE_THROW(uint32_t)
10485iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10486{
10487 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10488 kIemNativeGstSimdRegLdStSz_High128,
10489 kIemNativeGstRegUse_ForFullWrite);
10490
10491 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10492
10493 /* Free but don't flush the register. */
10494 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10495
10496 return off;
10497}
10498
10499
10500#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
10501 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
10502
10503/** Emits code for IEM_MC_STORE_YREG_U128. */
10504DECL_INLINE_THROW(uint32_t)
10505iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
10506{
10507 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10508 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10509
10510 Assert(iDQword <= 1);
10511 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10512 iDQword == 0
10513 ? kIemNativeGstSimdRegLdStSz_Low128
10514 : kIemNativeGstSimdRegLdStSz_High128,
10515 kIemNativeGstRegUse_ForFullWrite);
10516
10517 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10518
10519 if (iDQword == 0)
10520 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10521 else
10522 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
10523
10524 /* Free but don't flush the source register. */
10525 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10526 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10527
10528 return off;
10529}
10530
10531
10532#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10533 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10534
10535/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
10536DECL_INLINE_THROW(uint32_t)
10537iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10538{
10539 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10540 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10541
10542 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10543 kIemNativeGstSimdRegLdStSz_256,
10544 kIemNativeGstRegUse_ForFullWrite);
10545
10546 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10547
10548 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10549 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10550
10551 /* Free but don't flush the source register. */
10552 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10553 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10554
10555 return off;
10556}
10557
10558
10559#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
10560 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
10561
10562/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
10563DECL_INLINE_THROW(uint32_t)
10564iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10565{
10566 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10567 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10568
10569 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10570 kIemNativeGstSimdRegLdStSz_256,
10571 kIemNativeGstRegUse_ForFullWrite);
10572
10573 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10574
10575 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10576 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10577
10578 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10579 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10580
10581 return off;
10582}
10583
10584
10585#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
10586 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
10587
10588/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
10589DECL_INLINE_THROW(uint32_t)
10590iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10591{
10592 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10593 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10594
10595 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10596 kIemNativeGstSimdRegLdStSz_256,
10597 kIemNativeGstRegUse_ForFullWrite);
10598
10599 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10600
10601 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10602 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10603
10604 /* Free but don't flush the source register. */
10605 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10606 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10607
10608 return off;
10609}
10610
10611
10612#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
10613 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
10614
10615/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
10616DECL_INLINE_THROW(uint32_t)
10617iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10618{
10619 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10620 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10621
10622 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10623 kIemNativeGstSimdRegLdStSz_256,
10624 kIemNativeGstRegUse_ForFullWrite);
10625
10626 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10627
10628 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10629 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10630
10631 /* Free but don't flush the source register. */
10632 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10633 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10634
10635 return off;
10636}
10637
10638
10639#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
10640 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
10641
10642/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
10643DECL_INLINE_THROW(uint32_t)
10644iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10645{
10646 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10647 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10648
10649 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10650 kIemNativeGstSimdRegLdStSz_256,
10651 kIemNativeGstRegUse_ForFullWrite);
10652
10653 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10654
10655 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10656 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10657
10658 /* Free but don't flush the source register. */
10659 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10660 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10661
10662 return off;
10663}
10664
10665
10666#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
10667 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
10668
10669/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
10670DECL_INLINE_THROW(uint32_t)
10671iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10672{
10673 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10674 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10675
10676 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10677 kIemNativeGstSimdRegLdStSz_256,
10678 kIemNativeGstRegUse_ForFullWrite);
10679
10680 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10681
10682 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10683
10684 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10685 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10686
10687 return off;
10688}
10689
10690
10691#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
10692 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
10693
10694/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
10695DECL_INLINE_THROW(uint32_t)
10696iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10697{
10698 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10699 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10700
10701 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10702 kIemNativeGstSimdRegLdStSz_256,
10703 kIemNativeGstRegUse_ForFullWrite);
10704
10705 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10706
10707 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10708
10709 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10710 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10711
10712 return off;
10713}
10714
10715
10716#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10717 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10718
10719/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
10720DECL_INLINE_THROW(uint32_t)
10721iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10722{
10723 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10724 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10725
10726 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10727 kIemNativeGstSimdRegLdStSz_256,
10728 kIemNativeGstRegUse_ForFullWrite);
10729
10730 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10731
10732 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10733
10734 /* Free but don't flush the source register. */
10735 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10736 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10737
10738 return off;
10739}
10740
10741
10742#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10743 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10744
10745/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
10746DECL_INLINE_THROW(uint32_t)
10747iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10748{
10749 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10750 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10751
10752 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10753 kIemNativeGstSimdRegLdStSz_256,
10754 kIemNativeGstRegUse_ForFullWrite);
10755
10756 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10757
10758 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10759
10760 /* Free but don't flush the source register. */
10761 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10762 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10763
10764 return off;
10765}
10766
10767
10768#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10769 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10770
10771/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
10772DECL_INLINE_THROW(uint32_t)
10773iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10774{
10775 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10776 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10777
10778 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10779 kIemNativeGstSimdRegLdStSz_256,
10780 kIemNativeGstRegUse_ForFullWrite);
10781
10782 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10783
10784 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
10785
10786 /* Free but don't flush the source register. */
10787 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10788 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10789
10790 return off;
10791}
10792
10793
10794#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10795 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10796
10797/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
10798DECL_INLINE_THROW(uint32_t)
10799iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10800{
10801 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10802 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10803
10804 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10805 kIemNativeGstSimdRegLdStSz_256,
10806 kIemNativeGstRegUse_ForFullWrite);
10807
10808 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10809
10810 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10811 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
10812
10813 /* Free but don't flush the source register. */
10814 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10815 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10816
10817 return off;
10818}
10819
10820
10821#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10822 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10823
10824/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
10825DECL_INLINE_THROW(uint32_t)
10826iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10827{
10828 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10829 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10830
10831 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10832 kIemNativeGstSimdRegLdStSz_256,
10833 kIemNativeGstRegUse_ForFullWrite);
10834
10835 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10836
10837 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10838 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10839
10840 /* Free but don't flush the source register. */
10841 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10842 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10843
10844 return off;
10845}
10846
10847
10848#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10849 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10850
10851/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10852DECL_INLINE_THROW(uint32_t)
10853iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10854{
10855 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10856 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10857
10858 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10859 kIemNativeGstSimdRegLdStSz_256,
10860 kIemNativeGstRegUse_ForFullWrite);
10861 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10862 kIemNativeGstSimdRegLdStSz_Low128,
10863 kIemNativeGstRegUse_ReadOnly);
10864 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10865
10866 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10867 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10868 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10869
10870 /* Free but don't flush the source and destination registers. */
10871 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10872 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10873 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10874
10875 return off;
10876}
10877
10878
10879#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10880 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10881
10882/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10883DECL_INLINE_THROW(uint32_t)
10884iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10885{
10886 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10887 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10888
10889 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10890 kIemNativeGstSimdRegLdStSz_256,
10891 kIemNativeGstRegUse_ForFullWrite);
10892 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10893 kIemNativeGstSimdRegLdStSz_Low128,
10894 kIemNativeGstRegUse_ReadOnly);
10895 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10896
10897 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10898 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10899 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10900
10901 /* Free but don't flush the source and destination registers. */
10902 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10903 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10904 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10905
10906 return off;
10907}
10908
10909
10910#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10911 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10912
10913
10914/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10915DECL_INLINE_THROW(uint32_t)
10916iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10917{
10918 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10919 kIemNativeGstSimdRegLdStSz_Low128,
10920 kIemNativeGstRegUse_ForUpdate);
10921
10922 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10923 if (bImm8Mask & RT_BIT(0))
10924 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10925 if (bImm8Mask & RT_BIT(1))
10926 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10927 if (bImm8Mask & RT_BIT(2))
10928 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10929 if (bImm8Mask & RT_BIT(3))
10930 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10931
10932 /* Free but don't flush the destination register. */
10933 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10934
10935 return off;
10936}
10937
10938
10939#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10940 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10941
10942#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10943 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10944
10945/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10946DECL_INLINE_THROW(uint32_t)
10947iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10948{
10949 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10950 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10951
10952 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10953 kIemNativeGstSimdRegLdStSz_256,
10954 kIemNativeGstRegUse_ReadOnly);
10955 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10956
10957 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10958
10959 /* Free but don't flush the source register. */
10960 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10961 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10962
10963 return off;
10964}
10965
10966
10967#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10968 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10969
10970#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10971 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10972
10973/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10974DECL_INLINE_THROW(uint32_t)
10975iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10976{
10977 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10978 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10979
10980 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10981 kIemNativeGstSimdRegLdStSz_256,
10982 kIemNativeGstRegUse_ForFullWrite);
10983 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10984
10985 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10986
10987 /* Free but don't flush the source register. */
10988 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10989 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10990
10991 return off;
10992}
10993
10994
10995#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10996 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10997
10998
10999/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
11000DECL_INLINE_THROW(uint32_t)
11001iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
11002 uint8_t idxSrcVar, uint8_t iDwSrc)
11003{
11004 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
11005 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
11006
11007 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
11008 iDwDst < 4
11009 ? kIemNativeGstSimdRegLdStSz_Low128
11010 : kIemNativeGstSimdRegLdStSz_High128,
11011 kIemNativeGstRegUse_ForUpdate);
11012 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
11013 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
11014
11015 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
11016 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
11017
11018 /* Free but don't flush the source register. */
11019 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
11020 iemNativeRegFreeTmp(pReNative, idxRegTmp);
11021 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
11022
11023 return off;
11024}
11025
11026
11027#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
11028 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
11029
11030
11031/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
11032DECL_INLINE_THROW(uint32_t)
11033iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
11034 uint8_t idxSrcVar, uint8_t iQwSrc)
11035{
11036 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
11037 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
11038
11039 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
11040 iQwDst < 2
11041 ? kIemNativeGstSimdRegLdStSz_Low128
11042 : kIemNativeGstSimdRegLdStSz_High128,
11043 kIemNativeGstRegUse_ForUpdate);
11044 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
11045 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
11046
11047 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
11048 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
11049
11050 /* Free but don't flush the source register. */
11051 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
11052 iemNativeRegFreeTmp(pReNative, idxRegTmp);
11053 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
11054
11055 return off;
11056}
11057
11058
11059#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
11060 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
11061
11062
11063/** Emits code for IEM_MC_STORE_YREG_U64. */
11064DECL_INLINE_THROW(uint32_t)
11065iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
11066{
11067 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
11068 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
11069
11070 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
11071 iQwDst < 2
11072 ? kIemNativeGstSimdRegLdStSz_Low128
11073 : kIemNativeGstSimdRegLdStSz_High128,
11074 kIemNativeGstRegUse_ForUpdate);
11075
11076 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
11077
11078 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
11079
11080 /* Free but don't flush the source register. */
11081 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
11082 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
11083
11084 return off;
11085}
11086
11087
11088#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
11089 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
11090
11091/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
11092DECL_INLINE_THROW(uint32_t)
11093iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
11094{
11095 RT_NOREF(pReNative, iYReg);
11096 /** @todo Needs to be implemented when support for AVX-512 is added. */
11097 return off;
11098}
11099
11100
11101
11102/*********************************************************************************************************************************
11103* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
11104*********************************************************************************************************************************/
11105
11106/**
11107 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
11108 */
11109DECL_INLINE_THROW(uint32_t)
11110iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
11111{
11112 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
11113 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
11114 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
11115 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
11116
11117#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
11118 /*
11119 * Need to do the FPU preparation.
11120 */
11121 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
11122#endif
11123
11124 /*
11125 * Do all the call setup and cleanup.
11126 */
11127 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
11128 false /*fFlushPendingWrites*/);
11129
11130 /*
11131 * Load the MXCSR register into the first argument and mask out the current exception flags.
11132 */
11133 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
11134 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
11135
11136 /*
11137 * Make the call.
11138 */
11139 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
11140
11141 /*
11142 * The updated MXCSR is in the return register, update exception status flags.
11143 *
11144 * The return register is marked allocated as a temporary because it is required for the
11145 * exception generation check below.
11146 */
11147 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
11148 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
11149 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
11150
11151#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
11152 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
11153 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
11154#endif
11155
11156 /*
11157 * Make sure we don't have any outstanding guest register writes as we may
11158 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
11159 */
11160 off = iemNativeRegFlushPendingWrites(pReNative, off);
11161
11162#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11163 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11164#else
11165 RT_NOREF(idxInstr);
11166#endif
11167
11168 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
11169 * want to assume the existence for this instruction at the moment. */
11170 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
11171
11172 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
11173 /* tmp &= X86_MXCSR_XCPT_MASK */
11174 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
11175 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
11176 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
11177 /* tmp = ~tmp */
11178 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
11179 /* tmp &= mxcsr */
11180 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
11181 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseSseAvxFpRelated>(pReNative, off, idxRegTmp,
11182 X86_MXCSR_XCPT_FLAGS);
11183
11184 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
11185 iemNativeRegFreeTmp(pReNative, idxRegTmp);
11186 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
11187
11188 return off;
11189}
11190
11191
11192#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
11193 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11194
11195/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
11196DECL_INLINE_THROW(uint32_t)
11197iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11198{
11199 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11200 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11201 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11202}
11203
11204
11205#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11206 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11207
11208/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
11209DECL_INLINE_THROW(uint32_t)
11210iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11211 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11212{
11213 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11214 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11215 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11216 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11217}
11218
11219
11220/*********************************************************************************************************************************
11221* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
11222*********************************************************************************************************************************/
11223
11224#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
11225 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11226
11227/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
11228DECL_INLINE_THROW(uint32_t)
11229iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11230{
11231 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11232 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11233 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11234}
11235
11236
11237#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11238 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11239
11240/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
11241DECL_INLINE_THROW(uint32_t)
11242iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11243 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11244{
11245 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11246 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11247 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11248 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11249}
11250
11251
11252#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
11253
11254
11255/*********************************************************************************************************************************
11256* Include instruction emitters. *
11257*********************************************************************************************************************************/
11258#include "target-x86/IEMAllN8veEmit-x86.h"
11259
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette