VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 105877

Last change on this file since 105877 was 105877, checked in by vboxsync, 3 months ago

VMM/IEM: Don't flush PC prior to 64-bit relative jumps, flush it when in the #GP(0) code path. bugref:10720 bugref:10373

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 537.8 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 105877 2024-08-27 23:17:09Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
82# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
83#endif
84
85
86/*********************************************************************************************************************************
87* Code emitters for flushing pending guest register writes and sanity checks *
88*********************************************************************************************************************************/
89
90#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
91
92# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
93/**
94 * Updates IEMCPU::uPcUpdatingDebug.
95 */
96DECL_INLINE_THROW(uint32_t) iemNativeEmitPcDebugAdd(PIEMRECOMPILERSTATE pReNative, uint32_t off, int64_t offDisp, uint8_t cBits)
97{
98# ifdef RT_ARCH_AMD64
99 if (pReNative->Core.fDebugPcInitialized && cBits >= 32)
100 {
101 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
102 if ((int32_t)offDisp == offDisp || cBits != 64)
103 {
104 /* add [q]word [pVCpu->iem.s.uPcUpdatingDebug], imm32/imm8 */
105 if (cBits == 64)
106 pCodeBuf[off++] = X86_OP_REX_W;
107 pCodeBuf[off++] = (int8_t)offDisp == offDisp ? 0x83 : 0x81;
108 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
109 if ((int8_t)offDisp == offDisp)
110 pCodeBuf[off++] = (int8_t)offDisp;
111 else
112 {
113 *(int32_t *)&pCodeBuf[off] = (int32_t)offDisp;
114 off += sizeof(int32_t);
115 }
116 }
117 else
118 {
119 /* mov tmp0, imm64 */
120 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, offDisp);
121
122 /* add [pVCpu->iem.s.uPcUpdatingDebug], tmp0 */
123 if (cBits == 64)
124 pCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
125 else if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
126 pCodeBuf[off++] = X86_OP_REX_R;
127 pCodeBuf[off++] = 0x01;
128 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0 & 7,
129 RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
130 }
131 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
132 return off;
133 }
134# endif
135
136 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
137 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 12);
138
139 if (pReNative->Core.fDebugPcInitialized)
140 {
141 Log4(("uPcUpdatingDebug+=%ld cBits=%d off=%#x\n", offDisp, cBits, off));
142 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
143 }
144 else
145 {
146 Log4(("uPcUpdatingDebug=rip+%ld cBits=%d off=%#x\n", offDisp, cBits, off));
147 pReNative->Core.fDebugPcInitialized = true;
148 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
149 }
150
151 if (cBits == 64)
152 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxTmpReg, offDisp, IEMNATIVE_REG_FIXED_TMP0);
153 else
154 {
155 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxTmpReg, (int32_t)offDisp, IEMNATIVE_REG_FIXED_TMP0);
156 if (cBits == 16)
157 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, UINT16_MAX);
158 }
159
160 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug),
161 IEMNATIVE_REG_FIXED_TMP0);
162
163 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
164 iemNativeRegFreeTmp(pReNative, idxTmpReg);
165 return off;
166}
167
168
169# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
170DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
171{
172 /* Compare the shadow with the context value, they should match. */
173 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
174 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
175 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
176 return off;
177}
178# endif
179
180#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
181
182/**
183 * Flushes delayed write of a specific guest register.
184 *
185 * This must be called prior to calling CImpl functions and any helpers that use
186 * the guest state (like raising exceptions) and such.
187 *
188 * This optimization has not yet been implemented. The first target would be
189 * RIP updates, since these are the most common ones.
190 */
191DECL_INLINE_THROW(uint32_t)
192iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
193{
194#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
195 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
196#endif
197
198#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
199#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
200 if ( enmClass == kIemNativeGstRegRef_EFlags
201 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
202 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
203#else
204 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
205#endif
206
207 if ( enmClass == kIemNativeGstRegRef_Gpr
208 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
209 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
210#endif
211
212#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
213 if ( enmClass == kIemNativeGstRegRef_XReg
214 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
215 {
216 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
217 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
218 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
219
220 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
221 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
222 }
223#endif
224 RT_NOREF(pReNative, enmClass, idxReg);
225 return off;
226}
227
228
229
230/*********************************************************************************************************************************
231* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
232*********************************************************************************************************************************/
233
234#undef IEM_MC_BEGIN /* unused */
235#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
236 { \
237 Assert(pReNative->Core.bmVars == 0); \
238 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
239 Assert(pReNative->Core.bmStack == 0); \
240 pReNative->fMc = (a_fMcFlags); \
241 pReNative->fCImpl = (a_fCImplFlags); \
242 pReNative->cArgsX = (a_cArgsIncludingHidden)
243
244/** We have to get to the end in recompilation mode, as otherwise we won't
245 * generate code for all the IEM_MC_IF_XXX branches. */
246#define IEM_MC_END() \
247 iemNativeVarFreeAll(pReNative); \
248 } return off
249
250
251
252/*********************************************************************************************************************************
253* Native Emitter Support. *
254*********************************************************************************************************************************/
255
256#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
257
258#define IEM_MC_NATIVE_ELSE() } else {
259
260#define IEM_MC_NATIVE_ENDIF() } ((void)0)
261
262
263#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
264 off = a_fnEmitter(pReNative, off)
265
266#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
267 off = a_fnEmitter(pReNative, off, (a0))
268
269#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
270 off = a_fnEmitter(pReNative, off, (a0), (a1))
271
272#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
273 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
274
275#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
276 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
277
278#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
279 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
280
281#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
282 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
283
284#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
285 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
286
287#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
288 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
289
290#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
291 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
292
293
294#ifndef RT_ARCH_AMD64
295# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
296#else
297/** @note This is a naive approach that ASSUMES that the register isn't
298 * allocated, so it only works safely for the first allocation(s) in
299 * a MC block. */
300# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
301 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
302
303DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
304
305DECL_INLINE_THROW(uint32_t)
306iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
307{
308 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
309 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
310 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
311
312# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
313 /* Must flush the register if it hold pending writes. */
314 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
315 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
316 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
317# endif
318
319 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
320 return off;
321}
322
323#endif /* RT_ARCH_AMD64 */
324
325
326
327/*********************************************************************************************************************************
328* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
329*********************************************************************************************************************************/
330
331#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
332 pReNative->fMc = 0; \
333 pReNative->fCImpl = (a_fFlags); \
334 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
335 a_cbInstr) /** @todo not used ... */
336
337
338#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
339 pReNative->fMc = 0; \
340 pReNative->fCImpl = (a_fFlags); \
341 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
342
343DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
344 uint8_t idxInstr, uint64_t a_fGstShwFlush,
345 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
346{
347 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
348}
349
350
351#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
352 pReNative->fMc = 0; \
353 pReNative->fCImpl = (a_fFlags); \
354 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
355 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
356
357DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
358 uint8_t idxInstr, uint64_t a_fGstShwFlush,
359 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
360{
361 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
362}
363
364
365#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
366 pReNative->fMc = 0; \
367 pReNative->fCImpl = (a_fFlags); \
368 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
369 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
370
371DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
372 uint8_t idxInstr, uint64_t a_fGstShwFlush,
373 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
374 uint64_t uArg2)
375{
376 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
377}
378
379
380
381/*********************************************************************************************************************************
382* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
383*********************************************************************************************************************************/
384
385/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
386 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
387DECL_INLINE_THROW(uint32_t)
388iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
389{
390 /*
391 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
392 * return with special status code and make the execution loop deal with
393 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
394 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
395 * could continue w/o interruption, it probably will drop into the
396 * debugger, so not worth the effort of trying to services it here and we
397 * just lump it in with the handling of the others.
398 *
399 * To simplify the code and the register state management even more (wrt
400 * immediate in AND operation), we always update the flags and skip the
401 * extra check associated conditional jump.
402 */
403 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
404 <= UINT32_MAX);
405#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
406 AssertMsg( pReNative->idxCurCall == 0
407 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
408 IEMLIVENESSBIT_IDX_EFL_OTHER)),
409 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
410 IEMLIVENESSBIT_IDX_EFL_OTHER)));
411#endif
412
413 /*
414 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
415 * any pending register writes must be flushed.
416 */
417 off = iemNativeRegFlushPendingWrites(pReNative, off);
418
419 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
420 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
421 true /*fSkipLivenessAssert*/);
422 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxEflReg,
423 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
424 kIemNativeLabelType_ReturnWithFlags);
425 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
426 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
427
428 /* Free but don't flush the EFLAGS register. */
429 iemNativeRegFreeTmp(pReNative, idxEflReg);
430
431 return off;
432}
433
434
435/** Helper for iemNativeEmitFinishInstructionWithStatus. */
436DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
437{
438 unsigned const offOpcodes = pCallEntry->offOpcode;
439 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
440 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
441 {
442 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
443 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
444 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
445 }
446 AssertFailedReturn(NIL_RTGCPHYS);
447}
448
449
450/** The VINF_SUCCESS dummy. */
451template<int const a_rcNormal, bool const a_fIsJump>
452DECL_FORCE_INLINE_THROW(uint32_t)
453iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
454 int32_t const offJump)
455{
456 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
457 if (a_rcNormal != VINF_SUCCESS)
458 {
459#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
460 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
461#else
462 RT_NOREF_PV(pCallEntry);
463#endif
464
465 /* As this code returns from the TB any pending register writes must be flushed. */
466 off = iemNativeRegFlushPendingWrites(pReNative, off);
467
468 /*
469 * If we're in a conditional, mark the current branch as exiting so we
470 * can disregard its state when we hit the IEM_MC_ENDIF.
471 */
472 iemNativeMarkCurCondBranchAsExiting(pReNative);
473
474 /*
475 * Use the lookup table for getting to the next TB quickly.
476 * Note! In this code path there can only be one entry at present.
477 */
478 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
479 PCIEMTB const pTbOrg = pReNative->pTbOrg;
480 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
481 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
482
483#if 0
484 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
485 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
486 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
487 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
488 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
489
490 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
491
492#else
493 /* Load the index as argument #1 for the helper call at the given label. */
494 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
495
496 /*
497 * Figure out the physical address of the current instruction and see
498 * whether the next instruction we're about to execute is in the same
499 * page so we by can optimistically skip TLB loading.
500 *
501 * - This is safe for all cases in FLAT mode.
502 * - In segmentmented modes it is complicated, given that a negative
503 * jump may underflow EIP and a forward jump may overflow or run into
504 * CS.LIM and triggering a #GP. The only thing we can get away with
505 * now at compile time is forward jumps w/o CS.LIM checks, since the
506 * lack of CS.LIM checks means we're good for the entire physical page
507 * we're executing on and another 15 bytes before we run into CS.LIM.
508 */
509 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
510# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
511 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
512# endif
513 )
514 {
515 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
516 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
517 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
518 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
519
520 {
521 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
522 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
523
524 /* Load the key lookup flags into the 2nd argument for the helper call.
525 - This is safe wrt CS limit checking since we're only here for FLAT modes.
526 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
527 interrupt shadow.
528 - The NMI inhibiting is more questionable, though... */
529 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
530 * Should we copy it into fExec to simplify this? OTOH, it's just a
531 * couple of extra instructions if EFLAGS are already in a register. */
532 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
533 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
534
535 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
536 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookup);
537 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithIrq);
538 }
539 }
540 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
541 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlb);
542 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq);
543#endif
544 }
545 return off;
546}
547
548
549#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
550 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
551 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
552
553#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
554 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
555 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
556 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
557
558/** Same as iemRegAddToRip64AndFinishingNoFlags. */
559DECL_INLINE_THROW(uint32_t)
560iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
561{
562#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
563# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
564 if (!pReNative->Core.offPc)
565 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
566# endif
567
568 /* Allocate a temporary PC register. */
569 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
570
571 /* Perform the addition and store the result. */
572 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
573 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
574
575 /* Free but don't flush the PC register. */
576 iemNativeRegFreeTmp(pReNative, idxPcReg);
577#endif
578
579#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
580 pReNative->Core.offPc += cbInstr;
581 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
582# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
583 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 64);
584 off = iemNativeEmitPcDebugCheck(pReNative, off);
585# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
586 off = iemNativePcAdjustCheck(pReNative, off);
587# endif
588 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
589#endif
590
591 return off;
592}
593
594
595#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
596 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
597 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
598
599#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
600 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
601 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
602 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
603
604/** Same as iemRegAddToEip32AndFinishingNoFlags. */
605DECL_INLINE_THROW(uint32_t)
606iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
607{
608#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
609# ifdef IEMNATIVE_REG_FIXED_PC_DBG
610 if (!pReNative->Core.offPc)
611 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
612# endif
613
614 /* Allocate a temporary PC register. */
615 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
616
617 /* Perform the addition and store the result. */
618 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
619 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
620
621 /* Free but don't flush the PC register. */
622 iemNativeRegFreeTmp(pReNative, idxPcReg);
623#endif
624
625#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
626 pReNative->Core.offPc += cbInstr;
627 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
628# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
629 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 32);
630 off = iemNativeEmitPcDebugCheck(pReNative, off);
631# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
632 off = iemNativePcAdjustCheck(pReNative, off);
633# endif
634 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
635#endif
636
637 return off;
638}
639
640
641#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
642 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
643 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
644
645#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
646 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
647 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
648 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
649
650/** Same as iemRegAddToIp16AndFinishingNoFlags. */
651DECL_INLINE_THROW(uint32_t)
652iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
653{
654#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
655# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
656 if (!pReNative->Core.offPc)
657 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
658# endif
659
660 /* Allocate a temporary PC register. */
661 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
662
663 /* Perform the addition and store the result. */
664 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
665 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
666 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
667
668 /* Free but don't flush the PC register. */
669 iemNativeRegFreeTmp(pReNative, idxPcReg);
670#endif
671
672#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
673 pReNative->Core.offPc += cbInstr;
674 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
675# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
676 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 16);
677 off = iemNativeEmitPcDebugCheck(pReNative, off);
678# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
679 off = iemNativePcAdjustCheck(pReNative, off);
680# endif
681 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
682#endif
683
684 return off;
685}
686
687
688/*********************************************************************************************************************************
689* Common code for changing PC/RIP/EIP/IP. *
690*********************************************************************************************************************************/
691
692/**
693 * Emits code to check if the content of @a idxAddrReg is a canonical address,
694 * raising a \#GP(0) if it isn't.
695 *
696 * @returns New code buffer offset, UINT32_MAX on failure.
697 * @param pReNative The native recompile state.
698 * @param off The code buffer offset.
699 * @param idxAddrReg The host register with the address to check.
700 * @param idxInstr The current instruction.
701 */
702DECL_FORCE_INLINE_THROW(uint32_t)
703iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
704{
705 /*
706 * Make sure we don't have any outstanding guest register writes as we may
707 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
708 */
709 off = iemNativeRegFlushPendingWrites(pReNative, off);
710
711#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
712 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
713#else
714 RT_NOREF(idxInstr);
715#endif
716
717#ifdef RT_ARCH_AMD64
718 /*
719 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
720 * return raisexcpt();
721 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
722 */
723 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
724
725 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
726 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
727 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
728 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
729 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
730
731 iemNativeRegFreeTmp(pReNative, iTmpReg);
732
733#elif defined(RT_ARCH_ARM64)
734 /*
735 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
736 * return raisexcpt();
737 * ----
738 * mov x1, 0x800000000000
739 * add x1, x0, x1
740 * cmp xzr, x1, lsr 48
741 * b.ne .Lraisexcpt
742 */
743 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
744
745 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
746 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
747 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
748 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
749
750 iemNativeRegFreeTmp(pReNative, iTmpReg);
751
752#else
753# error "Port me"
754#endif
755 return off;
756}
757
758
759/**
760 * Emits code to check if the content of @a idxAddrReg is a canonical address,
761 * raising a \#GP(0) if it isn't.
762 *
763 * Caller makes sure everything is flushed, except maybe PC.
764 *
765 * @returns New code buffer offset, UINT32_MAX on failure.
766 * @param pReNative The native recompile state.
767 * @param off The code buffer offset.
768 * @param idxAddrReg The host register with the address to check.
769 * @param offDisp The relative displacement that has already been
770 * added to idxAddrReg and must be subtracted if
771 * raising a \#GP(0).
772 * @param idxInstr The current instruction.
773 */
774DECL_FORCE_INLINE_THROW(uint32_t)
775iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
776 uint8_t idxAddrReg, int64_t offDisp, uint8_t idxInstr)
777{
778#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
779 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
780#endif
781
782#ifdef RT_ARCH_AMD64
783 /*
784 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
785 * return raisexcpt();
786 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
787 */
788 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
789
790 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
791 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
792 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
793 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
794
795#elif defined(RT_ARCH_ARM64)
796 /*
797 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
798 * return raisexcpt();
799 * ----
800 * mov x1, 0x800000000000
801 * add x1, x0, x1
802 * cmp xzr, x1, lsr 48
803 * b.ne .Lraisexcpt
804 */
805 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
806
807 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
808 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
809 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
810#else
811# error "Port me"
812#endif
813
814 /* Jump to the #GP code (hoping static prediction considers forward branches as not-taken). */
815 uint32_t const offFixup1 = off;
816 off = iemNativeEmitJnzToFixed(pReNative, off, off /*8-bit jump suffices*/);
817
818 /* jump .Lnoexcept; Skip the #GP code. */
819 uint32_t const offFixup2 = off;
820 off = iemNativeEmitJmpToFixed(pReNative, off, off /*8-bit jump suffices*/);
821
822 /* .Lraisexcpt: */
823 iemNativeFixupFixedJump(pReNative, offFixup1, off);
824#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
825 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
826#else
827 RT_NOREF(idxInstr);
828#endif
829
830 /* Undo the PC adjustment and store the old PC value. */
831 off = iemNativeEmitSubGprImm(pReNative, off, idxAddrReg, offDisp, iTmpReg);
832 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxAddrReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
833
834 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
835
836 /* .Lnoexcept: */
837 iemNativeFixupFixedJump(pReNative, offFixup2, off);
838
839 iemNativeRegFreeTmp(pReNative, iTmpReg);
840 return off;
841}
842
843
844/**
845 * Emits code to check if the content of @a idxAddrReg is a canonical address,
846 * raising a \#GP(0) if it isn't.
847 *
848 * Caller makes sure everything is flushed, except maybe PC.
849 *
850 * @returns New code buffer offset, UINT32_MAX on failure.
851 * @param pReNative The native recompile state.
852 * @param off The code buffer offset.
853 * @param idxAddrReg The host register with the address to check.
854 * @param idxOldPcReg Register holding the old PC that offPc is relative
855 * to if available, otherwise UINT8_MAX.
856 * @param idxInstr The current instruction.
857 */
858DECL_FORCE_INLINE_THROW(uint32_t)
859iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
860 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
861{
862#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
863 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
864#endif
865
866#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
867# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
868 if (!pReNative->Core.offPc)
869# endif
870 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
871#else
872 RT_NOREF(idxInstr);
873#endif
874
875#ifdef RT_ARCH_AMD64
876 /*
877 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
878 * return raisexcpt();
879 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
880 */
881 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
882
883 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
884 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
885 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
886 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
887
888#elif defined(RT_ARCH_ARM64)
889 /*
890 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
891 * return raisexcpt();
892 * ----
893 * mov x1, 0x800000000000
894 * add x1, x0, x1
895 * cmp xzr, x1, lsr 48
896 * b.ne .Lraisexcpt
897 */
898 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
899
900 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
901 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
902 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
903#else
904# error "Port me"
905#endif
906
907#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
908 if (pReNative->Core.offPc)
909 {
910 /** @todo On x86, it is said that conditional jumps forward are statically
911 * predicited as not taken, so this isn't a very good construct.
912 * Investigate whether it makes sense to invert it and add another
913 * jump. Also, find out wtf the static predictor does here on arm! */
914 uint32_t const offFixup = off;
915 off = iemNativeEmitJzToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
916
917 /* .Lraisexcpt: */
918# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
919 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
920# endif
921 /* We need to update cpum.GstCtx.rip. */
922 if (idxOldPcReg == UINT8_MAX)
923 {
924 idxOldPcReg = iTmpReg;
925 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
926 }
927 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
928 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
929
930 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
931 iemNativeFixupFixedJump(pReNative, offFixup, off);
932 }
933 else
934#endif
935 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
936
937 iemNativeRegFreeTmp(pReNative, iTmpReg);
938
939 return off;
940}
941
942
943/**
944 * Emits code to check if that the content of @a idxAddrReg is within the limit
945 * of CS, raising a \#GP(0) if it isn't.
946 *
947 * @returns New code buffer offset; throws VBox status code on error.
948 * @param pReNative The native recompile state.
949 * @param off The code buffer offset.
950 * @param idxAddrReg The host register (32-bit) with the address to
951 * check.
952 * @param idxInstr The current instruction.
953 */
954DECL_FORCE_INLINE_THROW(uint32_t)
955iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
956 uint8_t idxAddrReg, uint8_t idxInstr)
957{
958 /*
959 * Make sure we don't have any outstanding guest register writes as we may
960 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
961 */
962 off = iemNativeRegFlushPendingWrites(pReNative, off);
963
964#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
965 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
966#else
967 RT_NOREF(idxInstr);
968#endif
969
970 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
971 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
972 kIemNativeGstRegUse_ReadOnly);
973
974 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
975 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
976
977 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
978 return off;
979}
980
981
982
983
984/**
985 * Emits code to check if that the content of @a idxAddrReg is within the limit
986 * of CS, raising a \#GP(0) if it isn't.
987 *
988 * Caller makes sure everything is flushed, except maybe PC.
989 *
990 * @returns New code buffer offset; throws VBox status code on error.
991 * @param pReNative The native recompile state.
992 * @param off The code buffer offset.
993 * @param idxAddrReg The host register (32-bit) with the address to
994 * check.
995 * @param idxOldPcReg Register holding the old PC that offPc is relative
996 * to if available, otherwise UINT8_MAX.
997 * @param idxInstr The current instruction.
998 */
999DECL_FORCE_INLINE_THROW(uint32_t)
1000iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1001 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
1002{
1003#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1004 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1005#endif
1006
1007#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1008# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1009 if (!pReNative->Core.offPc)
1010# endif
1011 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1012#else
1013 RT_NOREF(idxInstr);
1014#endif
1015
1016 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
1017 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
1018 kIemNativeGstRegUse_ReadOnly);
1019
1020 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
1021#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1022 if (pReNative->Core.offPc)
1023 {
1024 uint32_t const offFixup = off;
1025 off = iemNativeEmitJbeToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
1026
1027 /* Raising a GP(0), but first we need to update cpum.GstCtx.rip. */
1028 if (idxOldPcReg == UINT8_MAX)
1029 {
1030 idxOldPcReg = idxAddrReg;
1031 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1032 }
1033 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
1034 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1035# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1036 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1037# endif
1038 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
1039 iemNativeFixupFixedJump(pReNative, offFixup, off);
1040 }
1041 else
1042#endif
1043 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
1044
1045 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1046 return off;
1047}
1048
1049
1050/*********************************************************************************************************************************
1051* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
1052*********************************************************************************************************************************/
1053
1054#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1055 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1056 (a_enmEffOpSize), pCallEntry->idxInstr); \
1057 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1058
1059#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1060 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1061 (a_enmEffOpSize), pCallEntry->idxInstr); \
1062 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1063 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1064
1065#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
1066 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1067 IEMMODE_16BIT, pCallEntry->idxInstr); \
1068 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1069
1070#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1071 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1072 IEMMODE_16BIT, pCallEntry->idxInstr); \
1073 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1074 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1075
1076#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
1077 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1078 IEMMODE_64BIT, pCallEntry->idxInstr); \
1079 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1080
1081#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1082 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1083 IEMMODE_64BIT, pCallEntry->idxInstr); \
1084 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1085 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1086
1087
1088#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1089 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1090 (a_enmEffOpSize), pCallEntry->idxInstr); \
1091 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1092
1093#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1094 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1095 (a_enmEffOpSize), pCallEntry->idxInstr); \
1096 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1097 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1098
1099#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
1100 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1101 IEMMODE_16BIT, pCallEntry->idxInstr); \
1102 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1103
1104#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1105 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1106 IEMMODE_16BIT, pCallEntry->idxInstr); \
1107 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1108 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1109
1110#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
1111 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1112 IEMMODE_64BIT, pCallEntry->idxInstr); \
1113 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1114
1115#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1116 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1117 IEMMODE_64BIT, pCallEntry->idxInstr); \
1118 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1119 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1120
1121/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
1122 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
1123 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
1124template<bool const a_fWithinPage>
1125DECL_INLINE_THROW(uint32_t)
1126iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1127 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1128{
1129 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
1130#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1131 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1132 if (a_fWithinPage && enmEffOpSize == IEMMODE_64BIT)
1133 {
1134 /* No #GP checking required, just update offPc and get on with it. */
1135 pReNative->Core.offPc += (int64_t)offDisp + cbInstr;
1136# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1137 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1138# endif
1139 }
1140 else
1141#endif
1142 {
1143 /* Flush all but PC iff we're doing a 64-bit update here and this isn't within a page.. */
1144 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT && !a_fWithinPage))
1145 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1146
1147 /* Allocate a temporary PC register. */
1148 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1149 kIemNativeGstRegUse_ForUpdate);
1150
1151 /* Perform the addition. */
1152 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr + pReNative->Core.offPc);
1153
1154 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
1155 {
1156 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
1157 We can skip this if the target is within the same page. */
1158 if (!a_fWithinPage)
1159 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(pReNative, off, idxPcReg,
1160 (int64_t)offDisp + cbInstr, idxInstr);
1161 }
1162 else
1163 {
1164 /* Just truncate the result to 16-bit IP. */
1165 Assert(enmEffOpSize == IEMMODE_16BIT);
1166 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1167 }
1168
1169#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1170# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1171 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1172 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1173# endif
1174 /* Since we've already got the new PC value in idxPcReg, we can just as
1175 well write it out and reset offPc to zero. Otherwise, we'd need to use
1176 a copy the shadow PC, which will cost another move instruction here. */
1177 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
1178 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
1179 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
1180 Log4(("iemNativeEmitRip64RelativeJumpAndFinishingNoFlags: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u cCondDepth=%d\n",
1181 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped, pReNative->cCondDepth));
1182 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
1183# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1184 iemNativeDbgInfoAddNativeOffset(pReNative, off);
1185 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
1186# endif
1187 pReNative->Core.offPc = 0;
1188#endif
1189
1190 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1191
1192 /* Free but don't flush the PC register. */
1193 iemNativeRegFreeTmp(pReNative, idxPcReg);
1194 }
1195 return off;
1196}
1197
1198
1199#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1200 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1201 (a_enmEffOpSize), pCallEntry->idxInstr); \
1202 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1203
1204#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1205 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1206 (a_enmEffOpSize), pCallEntry->idxInstr); \
1207 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1208 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1209
1210#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
1211 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1212 IEMMODE_16BIT, pCallEntry->idxInstr); \
1213 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1214
1215#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1216 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1217 IEMMODE_16BIT, pCallEntry->idxInstr); \
1218 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1219 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1220
1221#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
1222 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1223 IEMMODE_32BIT, pCallEntry->idxInstr); \
1224 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1225
1226#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1227 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1228 IEMMODE_32BIT, pCallEntry->idxInstr); \
1229 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1230 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1231
1232
1233#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1234 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1235 (a_enmEffOpSize), pCallEntry->idxInstr); \
1236 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1237
1238#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1239 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1240 (a_enmEffOpSize), pCallEntry->idxInstr); \
1241 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1242 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1243
1244#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
1245 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1246 IEMMODE_16BIT, pCallEntry->idxInstr); \
1247 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1248
1249#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1250 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1251 IEMMODE_16BIT, pCallEntry->idxInstr); \
1252 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1253 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1254
1255#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
1256 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1257 IEMMODE_32BIT, pCallEntry->idxInstr); \
1258 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1259
1260#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1261 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1262 IEMMODE_32BIT, pCallEntry->idxInstr); \
1263 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1264 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1265
1266/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
1267 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
1268 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
1269template<bool const a_fFlat>
1270DECL_INLINE_THROW(uint32_t)
1271iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1272 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1273{
1274 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
1275#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1276 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1277#endif
1278
1279 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1280 if (!a_fFlat || enmEffOpSize == IEMMODE_16BIT)
1281 {
1282 off = iemNativeRegFlushPendingWrites(pReNative, off);
1283#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1284 Assert(pReNative->Core.offPc == 0);
1285#endif
1286 }
1287
1288 /* Allocate a temporary PC register. */
1289 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1290
1291 /* Perform the addition. */
1292#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1293 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1294#else
1295 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1296#endif
1297
1298 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
1299 if (enmEffOpSize == IEMMODE_16BIT)
1300 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1301
1302 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
1303 if (!a_fFlat)
1304 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1305
1306 /* Commit it. */
1307#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1308 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, enmEffOpSize == IEMMODE_32BIT ? 32 : 16);
1309 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1310#endif
1311
1312 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1313#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1314 pReNative->Core.offPc = 0;
1315#endif
1316
1317 /* Free but don't flush the PC register. */
1318 iemNativeRegFreeTmp(pReNative, idxPcReg);
1319
1320 return off;
1321}
1322
1323
1324#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
1325 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1326 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1327
1328#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
1329 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1330 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1331 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1332
1333#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
1334 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1335 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1336
1337#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1338 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1339 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1340 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1341
1342#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
1343 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1344 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1345
1346#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1347 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1348 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1349 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1350
1351/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
1352DECL_INLINE_THROW(uint32_t)
1353iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1354 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
1355{
1356 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1357 off = iemNativeRegFlushPendingWrites(pReNative, off);
1358
1359#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1360 Assert(pReNative->Core.offPc == 0);
1361 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1362#endif
1363
1364 /* Allocate a temporary PC register. */
1365 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1366
1367 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
1368 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
1369 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1370 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1371#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1372 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, 16);
1373 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1374#endif
1375 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1376
1377 /* Free but don't flush the PC register. */
1378 iemNativeRegFreeTmp(pReNative, idxPcReg);
1379
1380 return off;
1381}
1382
1383
1384
1385/*********************************************************************************************************************************
1386* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
1387*********************************************************************************************************************************/
1388
1389/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
1390#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
1391 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1392
1393/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
1394#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
1395 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1396
1397/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
1398#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
1399 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1400
1401/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
1402 * clears flags. */
1403#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
1404 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
1405 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1406
1407/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
1408 * clears flags. */
1409#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
1410 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
1411 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1412
1413/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
1414 * clears flags. */
1415#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
1416 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
1417 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1418
1419#undef IEM_MC_SET_RIP_U16_AND_FINISH
1420
1421
1422/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
1423#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
1424 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1425
1426/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
1427#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
1428 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1429
1430/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
1431 * clears flags. */
1432#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
1433 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
1434 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1435
1436/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
1437 * and clears flags. */
1438#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
1439 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
1440 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1441
1442#undef IEM_MC_SET_RIP_U32_AND_FINISH
1443
1444
1445/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
1446#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
1447 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1448
1449/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
1450 * and clears flags. */
1451#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
1452 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
1453 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1454
1455#undef IEM_MC_SET_RIP_U64_AND_FINISH
1456
1457
1458/** Same as iemRegRipJumpU16AndFinishNoFlags,
1459 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
1460DECL_INLINE_THROW(uint32_t)
1461iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
1462 uint8_t idxInstr, uint8_t cbVar)
1463{
1464 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1465 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1466
1467 /* If we can't rule out a #GP(0) below, flush all dirty register except for
1468 PC which will be handled specially by the two workers below if they raise a GP. */
1469 bool const fMayRaiseGp0 = (f64Bit && cbVar > sizeof(uint32_t)) || (!f64Bit && !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1470 uint8_t const idxOldPcReg = fMayRaiseGp0
1471 ? iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_Pc)
1472 : UINT8_MAX;
1473 if (fMayRaiseGp0)
1474 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1475
1476 /* Get a register with the new PC loaded from idxVarPc.
1477 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1478 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1479
1480 /* Check that the target is within CS.LIM / is canonical (may #GP(0) + exit TB). */
1481 if (fMayRaiseGp0)
1482 {
1483 if (f64Bit)
1484 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1485 else
1486 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1487 }
1488
1489 /* Store the result. */
1490 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1491
1492#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1493 pReNative->Core.offPc = 0;
1494 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1495# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1496 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1497 pReNative->Core.fDebugPcInitialized = true;
1498 Log4(("uPcUpdatingDebug=rip off=%#x\n", off));
1499# endif
1500#endif
1501
1502 if (idxOldPcReg != UINT8_MAX)
1503 iemNativeRegFreeTmp(pReNative, idxOldPcReg);
1504 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1505 /** @todo implictly free the variable? */
1506
1507 return off;
1508}
1509
1510
1511
1512/*********************************************************************************************************************************
1513* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1514*********************************************************************************************************************************/
1515
1516/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1517 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1518DECL_FORCE_INLINE_THROW(uint32_t)
1519iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1520{
1521 /* Use16BitSp: */
1522#ifdef RT_ARCH_AMD64
1523 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1524 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1525#else
1526 /* sub regeff, regrsp, #cbMem */
1527 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1528 /* and regeff, regeff, #0xffff */
1529 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1530 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1531 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1532 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1533#endif
1534 return off;
1535}
1536
1537
1538DECL_FORCE_INLINE(uint32_t)
1539iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1540{
1541 /* Use32BitSp: */
1542 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1543 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1544 return off;
1545}
1546
1547
1548DECL_INLINE_THROW(uint32_t)
1549iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1550 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
1551{
1552 /*
1553 * Assert sanity.
1554 */
1555#ifdef VBOX_STRICT
1556 if (RT_BYTE2(cBitsVarAndFlat) != 0)
1557 {
1558 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1559 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1560 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1561 Assert( pfnFunction
1562 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1563 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1564 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1565 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1566 : UINT64_C(0xc000b000a0009000) ));
1567 }
1568 else
1569 Assert( pfnFunction
1570 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1571 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1572 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1573 : UINT64_C(0xc000b000a0009000) ));
1574#endif
1575
1576#ifdef VBOX_STRICT
1577 /*
1578 * Check that the fExec flags we've got make sense.
1579 */
1580 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1581#endif
1582
1583 /*
1584 * To keep things simple we have to commit any pending writes first as we
1585 * may end up making calls.
1586 */
1587 /** @todo we could postpone this till we make the call and reload the
1588 * registers after returning from the call. Not sure if that's sensible or
1589 * not, though. */
1590 off = iemNativeRegFlushPendingWrites(pReNative, off);
1591
1592 /*
1593 * First we calculate the new RSP and the effective stack pointer value.
1594 * For 64-bit mode and flat 32-bit these two are the same.
1595 * (Code structure is very similar to that of PUSH)
1596 */
1597 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1598 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1599 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1600 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1601 ? cbMem : sizeof(uint16_t);
1602 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1603 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1604 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1605 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1606 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1607 if (cBitsFlat != 0)
1608 {
1609 Assert(idxRegEffSp == idxRegRsp);
1610 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1611 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1612 if (cBitsFlat == 64)
1613 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1614 else
1615 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1616 }
1617 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1618 {
1619 Assert(idxRegEffSp != idxRegRsp);
1620 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1621 kIemNativeGstRegUse_ReadOnly);
1622#ifdef RT_ARCH_AMD64
1623 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1624#else
1625 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1626#endif
1627 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1628 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1629 offFixupJumpToUseOtherBitSp = off;
1630 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1631 {
1632 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1633 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1634 }
1635 else
1636 {
1637 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1638 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1639 }
1640 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1641 }
1642 /* SpUpdateEnd: */
1643 uint32_t const offLabelSpUpdateEnd = off;
1644
1645 /*
1646 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1647 * we're skipping lookup).
1648 */
1649 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1650 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1651 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1652 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1653 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1654 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1655 : UINT32_MAX;
1656 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1657
1658
1659 if (!TlbState.fSkip)
1660 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1661 else
1662 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1663
1664 /*
1665 * Use16BitSp:
1666 */
1667 if (cBitsFlat == 0)
1668 {
1669#ifdef RT_ARCH_AMD64
1670 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1671#else
1672 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1673#endif
1674 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1675 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1676 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1677 else
1678 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1679 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1680 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1681 }
1682
1683 /*
1684 * TlbMiss:
1685 *
1686 * Call helper to do the pushing.
1687 */
1688 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1689
1690#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1691 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1692#else
1693 RT_NOREF(idxInstr);
1694#endif
1695
1696 /* Save variables in volatile registers. */
1697 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1698 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1699 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1700 | (RT_BIT_32(idxRegPc));
1701 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1702
1703 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1704 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1705 {
1706 /* Swap them using ARG0 as temp register: */
1707 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1708 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1709 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1710 }
1711 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1712 {
1713 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1714 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1715
1716 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1717 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1718 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1719 }
1720 else
1721 {
1722 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1723 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1724
1725 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1726 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1727 }
1728
1729 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1730 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1731
1732 /* Done setting up parameters, make the call. */
1733 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1734
1735 /* Restore variables and guest shadow registers to volatile registers. */
1736 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1737 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1738
1739#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1740 if (!TlbState.fSkip)
1741 {
1742 /* end of TlbMiss - Jump to the done label. */
1743 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1744 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1745
1746 /*
1747 * TlbLookup:
1748 */
1749 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1750 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1751
1752 /*
1753 * Emit code to do the actual storing / fetching.
1754 */
1755 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1756# ifdef IEM_WITH_TLB_STATISTICS
1757 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1758 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1759# endif
1760 switch (cbMemAccess)
1761 {
1762 case 2:
1763 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1764 break;
1765 case 4:
1766 if (!fIsIntelSeg)
1767 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1768 else
1769 {
1770 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1771 PUSH FS in real mode, so we have to try emulate that here.
1772 We borrow the now unused idxReg1 from the TLB lookup code here. */
1773 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1774 kIemNativeGstReg_EFlags);
1775 if (idxRegEfl != UINT8_MAX)
1776 {
1777#ifdef ARCH_AMD64
1778 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1779 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1780 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1781#else
1782 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1783 off, TlbState.idxReg1, idxRegEfl,
1784 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1785#endif
1786 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1787 }
1788 else
1789 {
1790 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1791 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1792 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1793 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1794 }
1795 /* ASSUMES the upper half of idxRegPc is ZERO. */
1796 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1797 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1798 }
1799 break;
1800 case 8:
1801 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1802 break;
1803 default:
1804 AssertFailed();
1805 }
1806
1807 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1808 TlbState.freeRegsAndReleaseVars(pReNative);
1809
1810 /*
1811 * TlbDone:
1812 *
1813 * Commit the new RSP value.
1814 */
1815 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1816 }
1817#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1818
1819#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1820 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1821#endif
1822 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1823 if (idxRegEffSp != idxRegRsp)
1824 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1825
1826 return off;
1827}
1828
1829
1830/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1831#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1832 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1833
1834/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1835 * clears flags. */
1836#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1837 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1838 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1839
1840/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1841#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1842 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1843
1844/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1845 * clears flags. */
1846#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1847 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1848 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1849
1850#undef IEM_MC_IND_CALL_U16_AND_FINISH
1851
1852
1853/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1854#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1855 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1856
1857/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1858 * clears flags. */
1859#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1860 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1861 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1862
1863#undef IEM_MC_IND_CALL_U32_AND_FINISH
1864
1865
1866/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1867 * an extra parameter, for use in 64-bit code. */
1868#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1869 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1870
1871
1872/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1873 * an extra parameter, for use in 64-bit code and we need to check and clear
1874 * flags. */
1875#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1876 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1877 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1878
1879#undef IEM_MC_IND_CALL_U64_AND_FINISH
1880
1881/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1882 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1883DECL_INLINE_THROW(uint32_t)
1884iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1885 uint8_t idxInstr, uint8_t cbVar)
1886{
1887 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1888 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1889
1890 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1891 off = iemNativeRegFlushPendingWrites(pReNative, off);
1892
1893#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1894 Assert(pReNative->Core.offPc == 0);
1895 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1896#endif
1897
1898 /* Get a register with the new PC loaded from idxVarPc.
1899 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1900 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1901
1902 /* Check limit (may #GP(0) + exit TB). */
1903 if (!f64Bit)
1904/** @todo we can skip this test in FLAT 32-bit mode. */
1905 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1906 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1907 else if (cbVar > sizeof(uint32_t))
1908 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1909
1910#if 1
1911 /* Allocate a temporary PC register, we don't want it shadowed. */
1912 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1913 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1914#else
1915 /* Allocate a temporary PC register. */
1916 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1917 true /*fNoVolatileRegs*/);
1918#endif
1919
1920 /* Perform the addition and push the variable to the guest stack. */
1921 /** @todo Flat variants for PC32 variants. */
1922 switch (cbVar)
1923 {
1924 case sizeof(uint16_t):
1925 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1926 /* Truncate the result to 16-bit IP. */
1927 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1928 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1929 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1930 break;
1931 case sizeof(uint32_t):
1932 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1933 /** @todo In FLAT mode we can use the flat variant. */
1934 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1935 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1936 break;
1937 case sizeof(uint64_t):
1938 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1939 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1940 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1941 break;
1942 default:
1943 AssertFailed();
1944 }
1945
1946 /* RSP got changed, so do this again. */
1947 off = iemNativeRegFlushPendingWrites(pReNative, off);
1948
1949 /* Store the result. */
1950 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1951#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1952 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1953 pReNative->Core.fDebugPcInitialized = true;
1954 Log4(("uPcUpdatingDebug=rip/indirect-call off=%#x\n", off));
1955#endif
1956
1957#if 1
1958 /* Need to transfer the shadow information to the new RIP register. */
1959 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1960#else
1961 /* Sync the new PC. */
1962 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1963#endif
1964 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1965 iemNativeRegFreeTmp(pReNative, idxPcReg);
1966 /** @todo implictly free the variable? */
1967
1968 return off;
1969}
1970
1971
1972/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1973 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1974#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1975 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1976
1977/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1978 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1979 * flags. */
1980#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1981 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1982 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1983
1984/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1985 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1986#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1987 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1988
1989/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1990 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1991 * flags. */
1992#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1993 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1994 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1995
1996/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1997 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1998#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1999 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2000
2001/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2002 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2003 * flags. */
2004#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
2005 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
2006 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2007
2008#undef IEM_MC_REL_CALL_S16_AND_FINISH
2009
2010/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2011 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2012DECL_INLINE_THROW(uint32_t)
2013iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
2014 uint8_t idxInstr)
2015{
2016 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2017 off = iemNativeRegFlushPendingWrites(pReNative, off);
2018
2019#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2020 Assert(pReNative->Core.offPc == 0);
2021 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2022#endif
2023
2024 /* Allocate a temporary PC register. */
2025 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2026 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
2027 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2028
2029 /* Calculate the new RIP. */
2030 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2031 /* Truncate the result to 16-bit IP. */
2032 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
2033 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2034 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2035
2036 /* Truncate the result to 16-bit IP. */
2037 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
2038
2039 /* Check limit (may #GP(0) + exit TB). */
2040 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2041
2042 /* Perform the addition and push the variable to the guest stack. */
2043 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
2044 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
2045
2046 /* RSP got changed, so flush again. */
2047 off = iemNativeRegFlushPendingWrites(pReNative, off);
2048
2049 /* Store the result. */
2050 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2051#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2052 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2053 pReNative->Core.fDebugPcInitialized = true;
2054 Log4(("uPcUpdatingDebug=rip/rel-call-16 off=%#x offDisp=%d\n", off, offDisp));
2055#endif
2056
2057 /* Need to transfer the shadow information to the new RIP register. */
2058 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2059 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2060 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2061
2062 return off;
2063}
2064
2065
2066/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2067 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2068#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
2069 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
2070
2071/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2072 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2073 * flags. */
2074#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
2075 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
2076 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2077
2078#undef IEM_MC_REL_CALL_S32_AND_FINISH
2079
2080/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2081 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2082DECL_INLINE_THROW(uint32_t)
2083iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
2084 uint8_t idxInstr)
2085{
2086 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2087 off = iemNativeRegFlushPendingWrites(pReNative, off);
2088
2089#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2090 Assert(pReNative->Core.offPc == 0);
2091 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2092#endif
2093
2094 /* Allocate a temporary PC register. */
2095 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2096 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2097 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2098
2099 /* Update the EIP to get the return address. */
2100 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2101
2102 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2103 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2104 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2105 /** @todo we can skip this test in FLAT 32-bit mode. */
2106 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2107
2108 /* Perform Perform the return address to the guest stack. */
2109 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
2110 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
2111 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
2112
2113 /* RSP got changed, so do this again. */
2114 off = iemNativeRegFlushPendingWrites(pReNative, off);
2115
2116 /* Store the result. */
2117 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2118#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2119 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2120 pReNative->Core.fDebugPcInitialized = true;
2121 Log4(("uPcUpdatingDebug=eip/rel-call-32 off=%#x offDisp=%d\n", off, offDisp));
2122#endif
2123
2124 /* Need to transfer the shadow information to the new RIP register. */
2125 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2126 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2127 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2128
2129 return off;
2130}
2131
2132
2133/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2134 * an extra parameter, for use in 64-bit code. */
2135#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
2136 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
2137
2138/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2139 * an extra parameter, for use in 64-bit code and we need to check and clear
2140 * flags. */
2141#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
2142 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
2143 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2144
2145#undef IEM_MC_REL_CALL_S64_AND_FINISH
2146
2147/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2148 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2149DECL_INLINE_THROW(uint32_t)
2150iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
2151 uint8_t idxInstr)
2152{
2153 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2154 off = iemNativeRegFlushPendingWrites(pReNative, off);
2155
2156#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2157 Assert(pReNative->Core.offPc == 0);
2158 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2159#endif
2160
2161 /* Allocate a temporary PC register. */
2162 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2163 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2164 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2165
2166 /* Update the RIP to get the return address. */
2167 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
2168
2169 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2170 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
2171 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
2172 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2173
2174 /* Perform Perform the return address to the guest stack. */
2175 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
2176 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
2177
2178 /* RSP got changed, so do this again. */
2179 off = iemNativeRegFlushPendingWrites(pReNative, off);
2180
2181 /* Store the result. */
2182 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2183#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2184 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2185 pReNative->Core.fDebugPcInitialized = true;
2186 Log4(("uPcUpdatingDebug=rip/rel-call-64 off=%#x offDisp=%ld\n", off, offDisp));
2187#endif
2188
2189 /* Need to transfer the shadow information to the new RIP register. */
2190 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2191 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2192 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2193
2194 return off;
2195}
2196
2197
2198/*********************************************************************************************************************************
2199* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
2200*********************************************************************************************************************************/
2201
2202DECL_FORCE_INLINE_THROW(uint32_t)
2203iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2204 uint16_t cbPopAdd, uint8_t idxRegTmp)
2205{
2206 /* Use16BitSp: */
2207#ifdef RT_ARCH_AMD64
2208 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2209 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
2210 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
2211 RT_NOREF(idxRegTmp);
2212
2213#elif defined(RT_ARCH_ARM64)
2214 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
2215 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
2216 /* add tmp, regrsp, #cbMem */
2217 uint16_t const cbCombined = cbMem + cbPopAdd;
2218 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
2219 if (cbCombined >= RT_BIT_32(12))
2220 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
2221 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2222 /* and tmp, tmp, #0xffff */
2223 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2224 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
2225 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
2226 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
2227
2228#else
2229# error "Port me"
2230#endif
2231 return off;
2232}
2233
2234
2235DECL_FORCE_INLINE_THROW(uint32_t)
2236iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2237 uint16_t cbPopAdd)
2238{
2239 /* Use32BitSp: */
2240 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2241 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
2242 return off;
2243}
2244
2245
2246/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
2247#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
2248 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
2249
2250/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
2251#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2252 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
2253
2254/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
2255#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2256 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
2257
2258/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
2259 * clears flags. */
2260#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
2261 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
2262 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2263
2264/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
2265 * clears flags. */
2266#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2267 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
2268 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2269
2270/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
2271 * clears flags. */
2272#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2273 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
2274 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2275
2276/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
2277DECL_INLINE_THROW(uint32_t)
2278iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
2279 IEMMODE enmEffOpSize, uint8_t idxInstr)
2280{
2281 RT_NOREF(cbInstr);
2282
2283#ifdef VBOX_STRICT
2284 /*
2285 * Check that the fExec flags we've got make sense.
2286 */
2287 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
2288#endif
2289
2290 /*
2291 * To keep things simple we have to commit any pending writes first as we
2292 * may end up making calls.
2293 */
2294 off = iemNativeRegFlushPendingWrites(pReNative, off);
2295
2296 /*
2297 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
2298 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
2299 * directly as the effective stack pointer.
2300 * (Code structure is very similar to that of PUSH)
2301 *
2302 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
2303 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
2304 * aren't commonly used (or useful) and thus not in need of optimizing.
2305 *
2306 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
2307 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
2308 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
2309 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
2310 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
2311 */
2312 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
2313 ? sizeof(uint64_t)
2314 : enmEffOpSize == IEMMODE_32BIT
2315 ? sizeof(uint32_t)
2316 : sizeof(uint16_t);
2317 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
2318 uintptr_t const pfnFunction = fFlat
2319 ? enmEffOpSize == IEMMODE_64BIT
2320 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
2321 : (uintptr_t)iemNativeHlpStackFlatFetchU32
2322 : enmEffOpSize == IEMMODE_32BIT
2323 ? (uintptr_t)iemNativeHlpStackFetchU32
2324 : (uintptr_t)iemNativeHlpStackFetchU16;
2325 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
2326 fFlat ? kIemNativeGstRegUse_ForUpdate
2327 : kIemNativeGstRegUse_Calculation,
2328 true /*fNoVolatileRegs*/);
2329 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
2330 /** @todo can do a better job picking the register here. For cbMem >= 4 this
2331 * will be the resulting register value. */
2332 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
2333
2334 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
2335 if (fFlat)
2336 Assert(idxRegEffSp == idxRegRsp);
2337 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
2338 {
2339 Assert(idxRegEffSp != idxRegRsp);
2340 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
2341 kIemNativeGstRegUse_ReadOnly);
2342#ifdef RT_ARCH_AMD64
2343 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2344#else
2345 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2346#endif
2347 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
2348 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
2349 offFixupJumpToUseOtherBitSp = off;
2350 if (enmEffOpSize == IEMMODE_32BIT)
2351 {
2352 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
2353 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2354 }
2355 else
2356 {
2357 Assert(enmEffOpSize == IEMMODE_16BIT);
2358 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
2359 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2360 idxRegMemResult);
2361 }
2362 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2363 }
2364 /* SpUpdateEnd: */
2365 uint32_t const offLabelSpUpdateEnd = off;
2366
2367 /*
2368 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
2369 * we're skipping lookup).
2370 */
2371 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
2372 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
2373 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
2374 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
2375 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
2376 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
2377 : UINT32_MAX;
2378
2379 if (!TlbState.fSkip)
2380 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
2381 else
2382 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
2383
2384 /*
2385 * Use16BitSp:
2386 */
2387 if (!fFlat)
2388 {
2389#ifdef RT_ARCH_AMD64
2390 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2391#else
2392 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2393#endif
2394 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
2395 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
2396 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2397 idxRegMemResult);
2398 else
2399 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2400 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
2401 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2402 }
2403
2404 /*
2405 * TlbMiss:
2406 *
2407 * Call helper to do the pushing.
2408 */
2409 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
2410
2411#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2412 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2413#else
2414 RT_NOREF(idxInstr);
2415#endif
2416
2417 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
2418 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
2419 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
2420 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
2421
2422
2423 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
2424 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
2425 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
2426
2427 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
2428 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2429
2430 /* Done setting up parameters, make the call. */
2431 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
2432
2433 /* Move the return register content to idxRegMemResult. */
2434 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
2435 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
2436
2437 /* Restore variables and guest shadow registers to volatile registers. */
2438 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
2439 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
2440
2441#ifdef IEMNATIVE_WITH_TLB_LOOKUP
2442 if (!TlbState.fSkip)
2443 {
2444 /* end of TlbMiss - Jump to the done label. */
2445 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
2446 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
2447
2448 /*
2449 * TlbLookup:
2450 */
2451 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
2452 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
2453
2454 /*
2455 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
2456 */
2457 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2458# ifdef IEM_WITH_TLB_STATISTICS
2459 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
2460 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
2461# endif
2462 switch (cbMem)
2463 {
2464 case 2:
2465 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2466 break;
2467 case 4:
2468 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2469 break;
2470 case 8:
2471 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2472 break;
2473 default:
2474 AssertFailed();
2475 }
2476
2477 TlbState.freeRegsAndReleaseVars(pReNative);
2478
2479 /*
2480 * TlbDone:
2481 *
2482 * Set the new RSP value (FLAT accesses needs to calculate it first) and
2483 * commit the popped register value.
2484 */
2485 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
2486 }
2487#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
2488
2489 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
2490 if (!f64Bit)
2491/** @todo we can skip this test in FLAT 32-bit mode. */
2492 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2493 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2494 else if (enmEffOpSize == IEMMODE_64BIT)
2495 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2496
2497 /* Complete RSP calculation for FLAT mode. */
2498 if (idxRegEffSp == idxRegRsp)
2499 {
2500 if (enmEffOpSize == IEMMODE_64BIT)
2501 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
2502 else
2503 {
2504 Assert(enmEffOpSize == IEMMODE_32BIT);
2505 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
2506 }
2507 }
2508
2509 /* Commit the result and clear any current guest shadows for RIP. */
2510 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
2511 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2512 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2513#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2514 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2515 pReNative->Core.fDebugPcInitialized = true;
2516 Log4(("uPcUpdatingDebug=rip/ret off=%#x\n", off));
2517#endif
2518
2519 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2520 if (!fFlat)
2521 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2522
2523 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2524 if (idxRegEffSp != idxRegRsp)
2525 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2526 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2527 return off;
2528}
2529
2530
2531/*********************************************************************************************************************************
2532* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2533*********************************************************************************************************************************/
2534
2535#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2536 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2537
2538/**
2539 * Emits code to check if a \#NM exception should be raised.
2540 *
2541 * @returns New code buffer offset, UINT32_MAX on failure.
2542 * @param pReNative The native recompile state.
2543 * @param off The code buffer offset.
2544 * @param idxInstr The current instruction.
2545 */
2546DECL_INLINE_THROW(uint32_t)
2547iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2548{
2549#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2550 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2551
2552 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2553 {
2554#endif
2555 /*
2556 * Make sure we don't have any outstanding guest register writes as we may
2557 * raise an #NM and all guest register must be up to date in CPUMCTX.
2558 */
2559 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2560 off = iemNativeRegFlushPendingWrites(pReNative, off);
2561
2562#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2563 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2564#else
2565 RT_NOREF(idxInstr);
2566#endif
2567
2568 /* Allocate a temporary CR0 register. */
2569 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2570 kIemNativeGstRegUse_ReadOnly);
2571
2572 /*
2573 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2574 * return raisexcpt();
2575 */
2576 /* Test and jump. */
2577 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS,
2578 kIemNativeLabelType_RaiseNm);
2579
2580 /* Free but don't flush the CR0 register. */
2581 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2582
2583#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2584 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2585 }
2586 else
2587 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2588#endif
2589
2590 return off;
2591}
2592
2593
2594#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2595 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2596
2597/**
2598 * Emits code to check if a \#NM exception should be raised.
2599 *
2600 * @returns New code buffer offset, UINT32_MAX on failure.
2601 * @param pReNative The native recompile state.
2602 * @param off The code buffer offset.
2603 * @param idxInstr The current instruction.
2604 */
2605DECL_INLINE_THROW(uint32_t)
2606iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2607{
2608#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2609 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2610
2611 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2612 {
2613#endif
2614 /*
2615 * Make sure we don't have any outstanding guest register writes as we may
2616 * raise an #NM and all guest register must be up to date in CPUMCTX.
2617 */
2618 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2619 off = iemNativeRegFlushPendingWrites(pReNative, off);
2620
2621#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2622 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2623#else
2624 RT_NOREF(idxInstr);
2625#endif
2626
2627 /* Allocate a temporary CR0 register. */
2628 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2629 kIemNativeGstRegUse_Calculation);
2630
2631 /*
2632 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2633 * return raisexcpt();
2634 */
2635 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2636 /* Test and jump. */
2637 off = iemNativeEmitTestIfGpr32EqualsImmAndTbExit(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS,
2638 kIemNativeLabelType_RaiseNm);
2639
2640 /* Free the CR0 register. */
2641 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2642
2643#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2644 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2645 }
2646 else
2647 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2648#endif
2649
2650 return off;
2651}
2652
2653
2654#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2655 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2656
2657/**
2658 * Emits code to check if a \#MF exception should be raised.
2659 *
2660 * @returns New code buffer offset, UINT32_MAX on failure.
2661 * @param pReNative The native recompile state.
2662 * @param off The code buffer offset.
2663 * @param idxInstr The current instruction.
2664 */
2665DECL_INLINE_THROW(uint32_t)
2666iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2667{
2668 /*
2669 * Make sure we don't have any outstanding guest register writes as we may
2670 * raise an #MF and all guest register must be up to date in CPUMCTX.
2671 */
2672 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2673 off = iemNativeRegFlushPendingWrites(pReNative, off);
2674
2675#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2676 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2677#else
2678 RT_NOREF(idxInstr);
2679#endif
2680
2681 /* Allocate a temporary FSW register. */
2682 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2683 kIemNativeGstRegUse_ReadOnly);
2684
2685 /*
2686 * if (FSW & X86_FSW_ES != 0)
2687 * return raisexcpt();
2688 */
2689 /* Test and jump. */
2690 off = iemNativeEmitTestBitInGprAndTbExitIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, kIemNativeLabelType_RaiseMf);
2691
2692 /* Free but don't flush the FSW register. */
2693 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2694
2695 return off;
2696}
2697
2698
2699#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2700 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2701
2702/**
2703 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2704 *
2705 * @returns New code buffer offset, UINT32_MAX on failure.
2706 * @param pReNative The native recompile state.
2707 * @param off The code buffer offset.
2708 * @param idxInstr The current instruction.
2709 */
2710DECL_INLINE_THROW(uint32_t)
2711iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2712{
2713#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2714 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2715
2716 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2717 {
2718#endif
2719 /*
2720 * Make sure we don't have any outstanding guest register writes as we may
2721 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2722 */
2723 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2724 off = iemNativeRegFlushPendingWrites(pReNative, off);
2725
2726#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2727 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2728#else
2729 RT_NOREF(idxInstr);
2730#endif
2731
2732 /* Allocate a temporary CR0 and CR4 register. */
2733 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2734 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2735 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2736
2737 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2738#ifdef RT_ARCH_AMD64
2739 /*
2740 * We do a modified test here:
2741 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2742 * else { goto RaiseSseRelated; }
2743 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2744 * all targets except the 386, which doesn't support SSE, this should
2745 * be a safe assumption.
2746 */
2747 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2748 //pCodeBuf[off++] = 0xcc;
2749 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2750 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2751 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2752 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2753 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2754 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseSseRelated, kIemNativeInstrCond_ne);
2755
2756#elif defined(RT_ARCH_ARM64)
2757 /*
2758 * We do a modified test here:
2759 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2760 * else { goto RaiseSseRelated; }
2761 */
2762 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2763 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2764 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2765 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2766 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2767 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2768 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2769 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2770 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2771 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2772 kIemNativeLabelType_RaiseSseRelated);
2773
2774#else
2775# error "Port me!"
2776#endif
2777
2778 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2779 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2780 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2781 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2782
2783#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2784 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2785 }
2786 else
2787 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2788#endif
2789
2790 return off;
2791}
2792
2793
2794#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2795 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2796
2797/**
2798 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2799 *
2800 * @returns New code buffer offset, UINT32_MAX on failure.
2801 * @param pReNative The native recompile state.
2802 * @param off The code buffer offset.
2803 * @param idxInstr The current instruction.
2804 */
2805DECL_INLINE_THROW(uint32_t)
2806iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2807{
2808#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2809 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2810
2811 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2812 {
2813#endif
2814 /*
2815 * Make sure we don't have any outstanding guest register writes as we may
2816 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2817 */
2818 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2819 off = iemNativeRegFlushPendingWrites(pReNative, off);
2820
2821#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2822 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2823#else
2824 RT_NOREF(idxInstr);
2825#endif
2826
2827 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2828 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2829 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2830 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2831 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2832
2833 /*
2834 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2835 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2836 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2837 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2838 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2839 * { likely }
2840 * else { goto RaiseAvxRelated; }
2841 */
2842#ifdef RT_ARCH_AMD64
2843 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2844 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2845 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2846 ^ 0x1a) ) { likely }
2847 else { goto RaiseAvxRelated; } */
2848 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2849 //pCodeBuf[off++] = 0xcc;
2850 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2851 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2852 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2853 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2854 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2855 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2856 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2857 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2858 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2859 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2860 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseAvxRelated, kIemNativeInstrCond_ne);
2861
2862#elif defined(RT_ARCH_ARM64)
2863 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2864 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2865 else { goto RaiseAvxRelated; } */
2866 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2867 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2868 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2869 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2870 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2871 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2872 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2873 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2874 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2875 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2876 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2877 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2878 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2879 kIemNativeLabelType_RaiseAvxRelated);
2880
2881#else
2882# error "Port me!"
2883#endif
2884
2885 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2886 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2887 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2888 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2889#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2890 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2891 }
2892 else
2893 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2894#endif
2895
2896 return off;
2897}
2898
2899
2900#define IEM_MC_RAISE_DIVIDE_ERROR() \
2901 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2902
2903/**
2904 * Emits code to raise a \#DE.
2905 *
2906 * @returns New code buffer offset, UINT32_MAX on failure.
2907 * @param pReNative The native recompile state.
2908 * @param off The code buffer offset.
2909 * @param idxInstr The current instruction.
2910 */
2911DECL_INLINE_THROW(uint32_t)
2912iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2913{
2914 /*
2915 * Make sure we don't have any outstanding guest register writes as we may
2916 */
2917 off = iemNativeRegFlushPendingWrites(pReNative, off);
2918
2919#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2920 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2921#else
2922 RT_NOREF(idxInstr);
2923#endif
2924
2925 /* raise \#DE exception unconditionally. */
2926 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseDe);
2927}
2928
2929
2930#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2931 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2932
2933/**
2934 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2935 *
2936 * @returns New code buffer offset, UINT32_MAX on failure.
2937 * @param pReNative The native recompile state.
2938 * @param off The code buffer offset.
2939 * @param idxInstr The current instruction.
2940 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2941 * @param cbAlign The alignment in bytes to check against.
2942 */
2943DECL_INLINE_THROW(uint32_t)
2944iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2945 uint8_t idxVarEffAddr, uint8_t cbAlign)
2946{
2947 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2948 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2949
2950 /*
2951 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2952 */
2953 off = iemNativeRegFlushPendingWrites(pReNative, off);
2954
2955#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2956 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2957#else
2958 RT_NOREF(idxInstr);
2959#endif
2960
2961 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2962
2963 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxVarReg, cbAlign - 1,
2964 kIemNativeLabelType_RaiseGp0);
2965
2966 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2967 return off;
2968}
2969
2970
2971/*********************************************************************************************************************************
2972* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2973*********************************************************************************************************************************/
2974
2975/**
2976 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2977 *
2978 * @returns Pointer to the condition stack entry on success, NULL on failure
2979 * (too many nestings)
2980 */
2981DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
2982{
2983 uint32_t const idxStack = pReNative->cCondDepth;
2984 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2985
2986 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2987 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2988
2989 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2990 pEntry->fInElse = false;
2991 pEntry->fIfExitTb = false;
2992 pEntry->fElseExitTb = false;
2993 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2994 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2995
2996 return pEntry;
2997}
2998
2999
3000/**
3001 * Start of the if-block, snapshotting the register and variable state.
3002 */
3003DECL_INLINE_THROW(void)
3004iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
3005{
3006 Assert(offIfBlock != UINT32_MAX);
3007 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3008 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3009 Assert(!pEntry->fInElse);
3010
3011 /* Define the start of the IF block if request or for disassembly purposes. */
3012 if (idxLabelIf != UINT32_MAX)
3013 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
3014#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3015 else
3016 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
3017#else
3018 RT_NOREF(offIfBlock);
3019#endif
3020
3021 /* Copy the initial state so we can restore it in the 'else' block. */
3022 pEntry->InitialState = pReNative->Core;
3023}
3024
3025
3026#define IEM_MC_ELSE() } while (0); \
3027 off = iemNativeEmitElse(pReNative, off); \
3028 do {
3029
3030/** Emits code related to IEM_MC_ELSE. */
3031DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3032{
3033 /* Check sanity and get the conditional stack entry. */
3034 Assert(off != UINT32_MAX);
3035 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3036 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3037 Assert(!pEntry->fInElse);
3038
3039 /* We can skip dirty register flushing and the dirty register flushing if
3040 the branch already jumped to a TB exit. */
3041 if (!pEntry->fIfExitTb)
3042 {
3043#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) && 0
3044 /* Writeback any dirty shadow registers. */
3045 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
3046 * in one of the branches and leave guest registers already dirty before the start of the if
3047 * block alone. */
3048 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3049#endif
3050
3051 /* Jump to the endif. */
3052 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
3053 }
3054# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3055 else
3056 Assert(pReNative->Core.offPc == 0);
3057# endif
3058
3059 /* Define the else label and enter the else part of the condition. */
3060 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3061 pEntry->fInElse = true;
3062
3063 /* Snapshot the core state so we can do a merge at the endif and restore
3064 the snapshot we took at the start of the if-block. */
3065 pEntry->IfFinalState = pReNative->Core;
3066 pReNative->Core = pEntry->InitialState;
3067
3068 return off;
3069}
3070
3071
3072#define IEM_MC_ENDIF() } while (0); \
3073 off = iemNativeEmitEndIf(pReNative, off)
3074
3075/** Emits code related to IEM_MC_ENDIF. */
3076DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3077{
3078 /* Check sanity and get the conditional stack entry. */
3079 Assert(off != UINT32_MAX);
3080 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3081 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3082
3083#if defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) && 0
3084 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3085#endif
3086
3087 /*
3088 * If either of the branches exited the TB, we can take the state from the
3089 * other branch and skip all the merging headache.
3090 */
3091 bool fDefinedLabels = false;
3092 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
3093 {
3094#ifdef VBOX_STRICT
3095 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
3096 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
3097 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
3098 ? &pEntry->IfFinalState : &pReNative->Core;
3099# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3100 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
3101# endif
3102# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3103 Assert(pExitCoreState->offPc == 0);
3104# endif
3105 RT_NOREF(pExitCoreState);
3106#endif
3107
3108 if (!pEntry->fIfExitTb)
3109 {
3110 Assert(pEntry->fInElse);
3111 pReNative->Core = pEntry->IfFinalState;
3112 }
3113 }
3114 else
3115 {
3116 /*
3117 * Now we have find common group with the core state at the end of the
3118 * if-final. Use the smallest common denominator and just drop anything
3119 * that isn't the same in both states.
3120 */
3121 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
3122 * which is why we're doing this at the end of the else-block.
3123 * But we'd need more info about future for that to be worth the effort. */
3124 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
3125#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3126 AssertMsgStmt(pReNative->Core.offPc == pOther->offPc,
3127 ("Core.offPc=%#RX64 pOther->offPc=%#RX64\n", pReNative->Core.offPc, pOther->offPc),
3128 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3129#endif
3130
3131 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
3132 {
3133#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3134 /*
3135 * If the branch has differences in dirty shadow registers, we will flush
3136 * the register only dirty in the current branch and dirty any that's only
3137 * dirty in the other one.
3138 */
3139 uint64_t const fGstRegDirtyOther = pOther->bmGstRegShadowDirty;
3140 uint64_t const fGstRegDirtyThis = pReNative->Core.bmGstRegShadowDirty;
3141 uint64_t const fGstRegDirtyDiff = fGstRegDirtyOther ^ fGstRegDirtyThis;
3142 uint64_t const fGstRegDirtyHead = fGstRegDirtyThis & fGstRegDirtyDiff;
3143 uint64_t fGstRegDirtyTail = fGstRegDirtyOther & fGstRegDirtyDiff;
3144 if (!fGstRegDirtyDiff)
3145 { /* likely */ }
3146 else
3147 {
3148 //uint64_t const fGstRegDirtyHead = pReNative->Core.bmGstRegShadowDirty & fGstRegDirtyDiff;
3149 if (fGstRegDirtyHead)
3150 {
3151 Log12(("iemNativeEmitEndIf: flushing dirty guest registers in current branch: %RX64\n", fGstRegDirtyHead));
3152 off = iemNativeRegFlushDirtyGuest(pReNative, off, fGstRegDirtyHead);
3153 }
3154 }
3155#endif
3156
3157 /*
3158 * Shadowed guest registers.
3159 *
3160 * We drop any shadows where the two states disagree about where
3161 * things are kept. We may end up flushing dirty more registers
3162 * here, if the two branches keeps things in different registers.
3163 */
3164 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
3165 if (fGstRegs)
3166 {
3167 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
3168 do
3169 {
3170 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3171 fGstRegs &= ~RT_BIT_64(idxGstReg);
3172
3173 uint8_t const idxCurHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3174 uint8_t const idxOtherHstReg = pOther->aidxGstRegShadows[idxGstReg];
3175 if ( idxCurHstReg != idxOtherHstReg
3176 || !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg)))
3177 {
3178#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3179 Log12(("iemNativeEmitEndIf: dropping gst %s (%d) from hst %s (other %d/%#RX64)\n",
3180 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3181 idxOtherHstReg, pOther->bmGstRegShadows));
3182#else
3183 Log12(("iemNativeEmitEndIf: dropping %s gst %s (%d) from hst %s (other %d/%#RX64/%s)\n",
3184 pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "_dirty_" : "clean",
3185 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3186 idxOtherHstReg, pOther->bmGstRegShadows,
3187 pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "dirty" : "clean"));
3188 if (pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3189 fGstRegDirtyTail |= RT_BIT_64(idxGstReg);
3190 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3191 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3192#endif
3193 iemNativeRegClearGstRegShadowingOne(pReNative, idxCurHstReg, (IEMNATIVEGSTREG)idxGstReg, off);
3194 }
3195 } while (fGstRegs);
3196 }
3197 else
3198 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
3199
3200#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3201 /*
3202 * Generate jumpy code for flushing dirty registers from the other
3203 * branch that aren't dirty in the current one.
3204 */
3205 if (!fGstRegDirtyTail)
3206 { /* likely */ }
3207 else
3208 {
3209 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEndIfOtherBranchDirty);
3210 Log12(("iemNativeEmitEndIf: Dirty register only in the other branch: %#RX64 - BAD!\n", fGstRegDirtyTail));
3211
3212 /* First the current branch has to jump over the dirty flushing from the other branch. */
3213 uint32_t const offFixup1 = off;
3214 off = iemNativeEmitJmpToFixed(pReNative, off, off + 10);
3215
3216 /* Put the endif and maybe else label here so the other branch ends up here. */
3217 if (!pEntry->fInElse)
3218 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3219 else
3220 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3221 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3222 fDefinedLabels = true;
3223
3224 /* Flush the dirty guest registers from the other branch. */
3225 while (fGstRegDirtyTail)
3226 {
3227 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegDirtyTail) - 1;
3228 fGstRegDirtyTail &= ~RT_BIT_64(idxGstReg);
3229 Log12(("iemNativeEmitEndIf: tail flushing %s (%d) from other branch %d (cur %d/%#RX64)\n",
3230 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, pOther->aidxGstRegShadows[idxGstReg],
3231 pReNative->Core.aidxGstRegShadows[idxGstReg], pReNative->Core.bmGstRegShadows));
3232
3233 off = iemNativeRegFlushPendingWriteEx(pReNative, off, (PIEMNATIVECORESTATE)pOther, (IEMNATIVEGSTREG)idxGstReg);
3234
3235 /* Mismatching shadowing should've been dropped in the previous step already. */
3236 Assert( !(pReNative->Core.bmGstRegShadows & RT_BIT_64(idxGstReg))
3237 || pReNative->Core.aidxGstRegShadows[idxGstReg] == pOther->aidxGstRegShadows[idxGstReg]);
3238 }
3239
3240 /* Here is the actual endif label, fixup the above jump to land here. */
3241 iemNativeFixupFixedJump(pReNative, offFixup1, off);
3242 }
3243#endif
3244
3245 /*
3246 * Check variables next. For now we must require them to be identical
3247 * or stuff we can recreate. (No code is emitted here.)
3248 */
3249 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
3250#ifdef VBOX_STRICT
3251 uint32_t const offAssert = off;
3252#endif
3253 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
3254 if (fVars)
3255 {
3256 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
3257 do
3258 {
3259 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
3260 fVars &= ~RT_BIT_32(idxVar);
3261
3262 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
3263 {
3264 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
3265 continue;
3266 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3267 {
3268 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3269 if (idxHstReg != UINT8_MAX)
3270 {
3271 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3272 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3273 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
3274 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3275 }
3276 continue;
3277 }
3278 }
3279 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
3280 continue;
3281
3282 /* Irreconcilable, so drop it. */
3283 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3284 if (idxHstReg != UINT8_MAX)
3285 {
3286 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3287 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3288 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
3289 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3290 }
3291 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3292 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
3293 } while (fVars);
3294 }
3295 Assert(off == offAssert);
3296
3297 /*
3298 * Finally, check that the host register allocations matches.
3299 */
3300 AssertMsgStmt((pReNative->Core.bmHstRegs & (pReNative->Core.bmHstRegs ^ pOther->bmHstRegs)) == 0,
3301 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
3302 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
3303 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3304 }
3305 }
3306
3307 /*
3308 * Define the endif label and maybe the else one if we're still in the 'if' part.
3309 */
3310 if (!fDefinedLabels)
3311 {
3312 if (!pEntry->fInElse)
3313 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3314 else
3315 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3316 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3317 }
3318
3319 /* Pop the conditional stack.*/
3320 pReNative->cCondDepth -= 1;
3321
3322 return off;
3323}
3324
3325
3326#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
3327 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
3328 do {
3329
3330/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
3331DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
3332{
3333 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3334 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3335
3336 /* Get the eflags. */
3337 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3338 kIemNativeGstRegUse_ReadOnly);
3339
3340 /* Test and jump. */
3341 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3342
3343 /* Free but don't flush the EFlags register. */
3344 iemNativeRegFreeTmp(pReNative, idxEflReg);
3345
3346 /* Make a copy of the core state now as we start the if-block. */
3347 iemNativeCondStartIfBlock(pReNative, off);
3348
3349 return off;
3350}
3351
3352
3353#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
3354 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
3355 do {
3356
3357/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
3358DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
3359{
3360 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3361 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3362
3363 /* Get the eflags. */
3364 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3365 kIemNativeGstRegUse_ReadOnly);
3366
3367 /* Test and jump. */
3368 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3369
3370 /* Free but don't flush the EFlags register. */
3371 iemNativeRegFreeTmp(pReNative, idxEflReg);
3372
3373 /* Make a copy of the core state now as we start the if-block. */
3374 iemNativeCondStartIfBlock(pReNative, off);
3375
3376 return off;
3377}
3378
3379
3380#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
3381 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
3382 do {
3383
3384/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
3385DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
3386{
3387 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3388 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3389
3390 /* Get the eflags. */
3391 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3392 kIemNativeGstRegUse_ReadOnly);
3393
3394 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3395 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3396
3397 /* Test and jump. */
3398 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3399
3400 /* Free but don't flush the EFlags register. */
3401 iemNativeRegFreeTmp(pReNative, idxEflReg);
3402
3403 /* Make a copy of the core state now as we start the if-block. */
3404 iemNativeCondStartIfBlock(pReNative, off);
3405
3406 return off;
3407}
3408
3409
3410#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
3411 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
3412 do {
3413
3414/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
3415DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
3416{
3417 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3418 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3419
3420 /* Get the eflags. */
3421 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3422 kIemNativeGstRegUse_ReadOnly);
3423
3424 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3425 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3426
3427 /* Test and jump. */
3428 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3429
3430 /* Free but don't flush the EFlags register. */
3431 iemNativeRegFreeTmp(pReNative, idxEflReg);
3432
3433 /* Make a copy of the core state now as we start the if-block. */
3434 iemNativeCondStartIfBlock(pReNative, off);
3435
3436 return off;
3437}
3438
3439
3440#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
3441 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
3442 do {
3443
3444#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
3445 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
3446 do {
3447
3448/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
3449DECL_INLINE_THROW(uint32_t)
3450iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3451 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
3452{
3453 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
3454 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3455
3456 /* Get the eflags. */
3457 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3458 kIemNativeGstRegUse_ReadOnly);
3459
3460 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
3461 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
3462
3463 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
3464 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
3465 Assert(iBitNo1 != iBitNo2);
3466
3467#ifdef RT_ARCH_AMD64
3468 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
3469
3470 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3471 if (iBitNo1 > iBitNo2)
3472 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3473 else
3474 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3475 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3476
3477#elif defined(RT_ARCH_ARM64)
3478 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3479 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3480
3481 /* and tmpreg, eflreg, #1<<iBitNo1 */
3482 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3483
3484 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3485 if (iBitNo1 > iBitNo2)
3486 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3487 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3488 else
3489 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3490 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3491
3492 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3493
3494#else
3495# error "Port me"
3496#endif
3497
3498 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3499 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3500 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3501
3502 /* Free but don't flush the EFlags and tmp registers. */
3503 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3504 iemNativeRegFreeTmp(pReNative, idxEflReg);
3505
3506 /* Make a copy of the core state now as we start the if-block. */
3507 iemNativeCondStartIfBlock(pReNative, off);
3508
3509 return off;
3510}
3511
3512
3513#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
3514 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
3515 do {
3516
3517#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
3518 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
3519 do {
3520
3521/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
3522 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
3523DECL_INLINE_THROW(uint32_t)
3524iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
3525 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
3526{
3527 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
3528 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3529
3530 /* We need an if-block label for the non-inverted variant. */
3531 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
3532 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
3533
3534 /* Get the eflags. */
3535 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3536 kIemNativeGstRegUse_ReadOnly);
3537
3538 /* Translate the flag masks to bit numbers. */
3539 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3540 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3541
3542 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
3543 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
3544 Assert(iBitNo1 != iBitNo);
3545
3546 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
3547 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
3548 Assert(iBitNo2 != iBitNo);
3549 Assert(iBitNo2 != iBitNo1);
3550
3551#ifdef RT_ARCH_AMD64
3552 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
3553#elif defined(RT_ARCH_ARM64)
3554 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3555#endif
3556
3557 /* Check for the lone bit first. */
3558 if (!fInverted)
3559 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3560 else
3561 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
3562
3563 /* Then extract and compare the other two bits. */
3564#ifdef RT_ARCH_AMD64
3565 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3566 if (iBitNo1 > iBitNo2)
3567 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3568 else
3569 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3570 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3571
3572#elif defined(RT_ARCH_ARM64)
3573 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3574
3575 /* and tmpreg, eflreg, #1<<iBitNo1 */
3576 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3577
3578 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3579 if (iBitNo1 > iBitNo2)
3580 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3581 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3582 else
3583 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3584 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3585
3586 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3587
3588#else
3589# error "Port me"
3590#endif
3591
3592 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3593 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3594 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3595
3596 /* Free but don't flush the EFlags and tmp registers. */
3597 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3598 iemNativeRegFreeTmp(pReNative, idxEflReg);
3599
3600 /* Make a copy of the core state now as we start the if-block. */
3601 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3602
3603 return off;
3604}
3605
3606
3607#define IEM_MC_IF_CX_IS_NZ() \
3608 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3609 do {
3610
3611/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3612DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3613{
3614 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3615
3616 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3617 kIemNativeGstRegUse_ReadOnly);
3618 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3619 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3620
3621 iemNativeCondStartIfBlock(pReNative, off);
3622 return off;
3623}
3624
3625
3626#define IEM_MC_IF_ECX_IS_NZ() \
3627 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3628 do {
3629
3630#define IEM_MC_IF_RCX_IS_NZ() \
3631 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3632 do {
3633
3634/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3635DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3636{
3637 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3638
3639 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3640 kIemNativeGstRegUse_ReadOnly);
3641 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3642 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3643
3644 iemNativeCondStartIfBlock(pReNative, off);
3645 return off;
3646}
3647
3648
3649#define IEM_MC_IF_CX_IS_NOT_ONE() \
3650 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3651 do {
3652
3653/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3654DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3655{
3656 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3657
3658 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3659 kIemNativeGstRegUse_ReadOnly);
3660#ifdef RT_ARCH_AMD64
3661 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3662#else
3663 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3664 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3665 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3666#endif
3667 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3668
3669 iemNativeCondStartIfBlock(pReNative, off);
3670 return off;
3671}
3672
3673
3674#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3675 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3676 do {
3677
3678#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3679 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3680 do {
3681
3682/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3683DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3684{
3685 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3686
3687 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3688 kIemNativeGstRegUse_ReadOnly);
3689 if (f64Bit)
3690 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3691 else
3692 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3693 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3694
3695 iemNativeCondStartIfBlock(pReNative, off);
3696 return off;
3697}
3698
3699
3700#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3701 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
3702 do {
3703
3704#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3705 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
3706 do {
3707
3708/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3709 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3710DECL_INLINE_THROW(uint32_t)
3711iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
3712{
3713 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3714 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3715
3716 /* We have to load both RCX and EFLAGS before we can start branching,
3717 otherwise we'll end up in the else-block with an inconsistent
3718 register allocator state.
3719 Doing EFLAGS first as it's more likely to be loaded, right? */
3720 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3721 kIemNativeGstRegUse_ReadOnly);
3722 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3723 kIemNativeGstRegUse_ReadOnly);
3724
3725 /** @todo we could reduce this to a single branch instruction by spending a
3726 * temporary register and some setnz stuff. Not sure if loops are
3727 * worth it. */
3728 /* Check CX. */
3729#ifdef RT_ARCH_AMD64
3730 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3731#else
3732 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3733 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3734 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3735#endif
3736
3737 /* Check the EFlags bit. */
3738 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3739 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3740 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3741 !fCheckIfSet /*fJmpIfSet*/);
3742
3743 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3744 iemNativeRegFreeTmp(pReNative, idxEflReg);
3745
3746 iemNativeCondStartIfBlock(pReNative, off);
3747 return off;
3748}
3749
3750
3751#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3752 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3753 do {
3754
3755#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3756 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3757 do {
3758
3759#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3760 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3761 do {
3762
3763#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3764 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3765 do {
3766
3767/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3768 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3769 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3770 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3771DECL_INLINE_THROW(uint32_t)
3772iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3773 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3774{
3775 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3776 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3777
3778 /* We have to load both RCX and EFLAGS before we can start branching,
3779 otherwise we'll end up in the else-block with an inconsistent
3780 register allocator state.
3781 Doing EFLAGS first as it's more likely to be loaded, right? */
3782 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3783 kIemNativeGstRegUse_ReadOnly);
3784 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3785 kIemNativeGstRegUse_ReadOnly);
3786
3787 /** @todo we could reduce this to a single branch instruction by spending a
3788 * temporary register and some setnz stuff. Not sure if loops are
3789 * worth it. */
3790 /* Check RCX/ECX. */
3791 if (f64Bit)
3792 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3793 else
3794 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3795
3796 /* Check the EFlags bit. */
3797 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3798 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3799 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3800 !fCheckIfSet /*fJmpIfSet*/);
3801
3802 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3803 iemNativeRegFreeTmp(pReNative, idxEflReg);
3804
3805 iemNativeCondStartIfBlock(pReNative, off);
3806 return off;
3807}
3808
3809
3810#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3811 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3812 do {
3813
3814/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3815DECL_INLINE_THROW(uint32_t)
3816iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3817{
3818 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3819
3820 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3821 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3822 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3823 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3824
3825 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3826
3827 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3828
3829 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3830
3831 iemNativeCondStartIfBlock(pReNative, off);
3832 return off;
3833}
3834
3835
3836#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3837 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3838 do {
3839
3840/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3841DECL_INLINE_THROW(uint32_t)
3842iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3843{
3844 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3845 Assert(iGReg < 16);
3846
3847 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3848 kIemNativeGstRegUse_ReadOnly);
3849
3850 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3851
3852 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3853
3854 iemNativeCondStartIfBlock(pReNative, off);
3855 return off;
3856}
3857
3858
3859
3860/*********************************************************************************************************************************
3861* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3862*********************************************************************************************************************************/
3863
3864#define IEM_MC_NOREF(a_Name) \
3865 RT_NOREF_PV(a_Name)
3866
3867#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3868 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3869
3870#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3871 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3872
3873#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3874 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3875
3876#define IEM_MC_LOCAL(a_Type, a_Name) \
3877 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3878
3879#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3880 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3881
3882#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3883 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3884
3885
3886/**
3887 * Sets the host register for @a idxVarRc to @a idxReg.
3888 *
3889 * The register must not be allocated. Any guest register shadowing will be
3890 * implictly dropped by this call.
3891 *
3892 * The variable must not have any register associated with it (causes
3893 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3894 * implied.
3895 *
3896 * @returns idxReg
3897 * @param pReNative The recompiler state.
3898 * @param idxVar The variable.
3899 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3900 * @param off For recording in debug info.
3901 *
3902 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3903 */
3904DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
3905{
3906 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3907 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3908 Assert(!pVar->fRegAcquired);
3909 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3910 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3911 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3912
3913 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3914 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3915
3916 iemNativeVarSetKindToStack(pReNative, idxVar);
3917 pVar->idxReg = idxReg;
3918
3919 return idxReg;
3920}
3921
3922
3923/**
3924 * A convenient helper function.
3925 */
3926DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3927 uint8_t idxReg, uint32_t *poff)
3928{
3929 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
3930 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3931 return idxReg;
3932}
3933
3934
3935/**
3936 * This is called by IEM_MC_END() to clean up all variables.
3937 */
3938DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3939{
3940 uint32_t const bmVars = pReNative->Core.bmVars;
3941 if (bmVars != 0)
3942 iemNativeVarFreeAllSlow(pReNative, bmVars);
3943 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3944 Assert(pReNative->Core.bmStack == 0);
3945}
3946
3947
3948#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3949
3950/**
3951 * This is called by IEM_MC_FREE_LOCAL.
3952 */
3953DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3954{
3955 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3956 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3957 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3958}
3959
3960
3961#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3962
3963/**
3964 * This is called by IEM_MC_FREE_ARG.
3965 */
3966DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3967{
3968 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3969 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3970 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3971}
3972
3973
3974#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
3975
3976/**
3977 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
3978 */
3979DECL_INLINE_THROW(uint32_t)
3980iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
3981{
3982 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
3983 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
3984 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3985 Assert( pVarDst->cbVar == sizeof(uint16_t)
3986 || pVarDst->cbVar == sizeof(uint32_t));
3987
3988 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
3989 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
3990 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
3991 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
3992 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3993
3994 Assert(pVarDst->cbVar < pVarSrc->cbVar);
3995
3996 /*
3997 * Special case for immediates.
3998 */
3999 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
4000 {
4001 switch (pVarDst->cbVar)
4002 {
4003 case sizeof(uint16_t):
4004 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
4005 break;
4006 case sizeof(uint32_t):
4007 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
4008 break;
4009 default: AssertFailed(); break;
4010 }
4011 }
4012 else
4013 {
4014 /*
4015 * The generic solution for now.
4016 */
4017 /** @todo optimize this by having the python script make sure the source
4018 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
4019 * statement. Then we could just transfer the register assignments. */
4020 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
4021 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
4022 switch (pVarDst->cbVar)
4023 {
4024 case sizeof(uint16_t):
4025 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
4026 break;
4027 case sizeof(uint32_t):
4028 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
4029 break;
4030 default: AssertFailed(); break;
4031 }
4032 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
4033 iemNativeVarRegisterRelease(pReNative, idxVarDst);
4034 }
4035 return off;
4036}
4037
4038
4039
4040/*********************************************************************************************************************************
4041* Emitters for IEM_MC_CALL_CIMPL_XXX *
4042*********************************************************************************************************************************/
4043
4044/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
4045DECL_INLINE_THROW(uint32_t)
4046iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
4047 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
4048
4049{
4050 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
4051
4052#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4053 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
4054 when a calls clobber any of the relevant control registers. */
4055# if 1
4056 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
4057 {
4058 /* Likely as long as call+ret are done via cimpl. */
4059 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
4060 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
4061 }
4062 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
4063 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4064 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
4065 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4066 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
4067 else
4068 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4069 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4070 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4071
4072# else
4073 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
4074 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4075 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
4076 pReNative->fSimdRaiseXcptChecksEmitted = 0;
4077 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
4078 || pfnCImpl == (uintptr_t)iemCImpl_callf
4079 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
4080 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
4081 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4082 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4083 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4084# endif
4085
4086# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
4087 /* Mark the host floating point control register as not synced if MXCSR is modified. */
4088 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
4089 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
4090# endif
4091#endif
4092
4093 /*
4094 * Do all the call setup and cleanup.
4095 */
4096 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
4097
4098 /*
4099 * Load the two or three hidden arguments.
4100 */
4101#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4102 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4103 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4104 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
4105#else
4106 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4107 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
4108#endif
4109
4110 /*
4111 * Make the call and check the return code.
4112 *
4113 * Shadow PC copies are always flushed here, other stuff depends on flags.
4114 * Segment and general purpose registers are explictily flushed via the
4115 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
4116 * macros.
4117 */
4118 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
4119#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4120 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4121#endif
4122 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
4123 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
4124 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4125 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4126
4127#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
4128 pReNative->Core.fDebugPcInitialized = false;
4129 Log4(("fDebugPcInitialized=false cimpl off=%#x (v1)\n", off));
4130#endif
4131
4132 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4133}
4134
4135
4136#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4137 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
4138
4139/** Emits code for IEM_MC_CALL_CIMPL_1. */
4140DECL_INLINE_THROW(uint32_t)
4141iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4142 uintptr_t pfnCImpl, uint8_t idxArg0)
4143{
4144 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4145 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
4146}
4147
4148
4149#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4150 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
4151
4152/** Emits code for IEM_MC_CALL_CIMPL_2. */
4153DECL_INLINE_THROW(uint32_t)
4154iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4155 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
4156{
4157 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4158 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4159 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
4160}
4161
4162
4163#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4164 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4165 (uintptr_t)a_pfnCImpl, a0, a1, a2)
4166
4167/** Emits code for IEM_MC_CALL_CIMPL_3. */
4168DECL_INLINE_THROW(uint32_t)
4169iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4170 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4171{
4172 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4173 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4174 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4175 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
4176}
4177
4178
4179#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
4180 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4181 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
4182
4183/** Emits code for IEM_MC_CALL_CIMPL_4. */
4184DECL_INLINE_THROW(uint32_t)
4185iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4186 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4187{
4188 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4189 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4190 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4191 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4192 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
4193}
4194
4195
4196#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
4197 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4198 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
4199
4200/** Emits code for IEM_MC_CALL_CIMPL_4. */
4201DECL_INLINE_THROW(uint32_t)
4202iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4203 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
4204{
4205 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4206 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4207 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4208 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4209 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
4210 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
4211}
4212
4213
4214/** Recompiler debugging: Flush guest register shadow copies. */
4215#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
4216
4217
4218
4219/*********************************************************************************************************************************
4220* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
4221*********************************************************************************************************************************/
4222
4223/**
4224 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
4225 */
4226DECL_INLINE_THROW(uint32_t)
4227iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4228 uintptr_t pfnAImpl, uint8_t cArgs)
4229{
4230 if (idxVarRc != UINT8_MAX)
4231 {
4232 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
4233 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
4234 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
4235 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
4236 }
4237
4238 /*
4239 * Do all the call setup and cleanup.
4240 *
4241 * It is only required to flush pending guest register writes in call volatile registers as
4242 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
4243 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
4244 * no matter the fFlushPendingWrites parameter.
4245 */
4246 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
4247
4248 /*
4249 * Make the call and update the return code variable if we've got one.
4250 */
4251 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
4252 if (idxVarRc != UINT8_MAX)
4253 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
4254
4255 return off;
4256}
4257
4258
4259
4260#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
4261 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
4262
4263#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
4264 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
4265
4266/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
4267DECL_INLINE_THROW(uint32_t)
4268iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
4269{
4270 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
4271}
4272
4273
4274#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
4275 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
4276
4277#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
4278 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
4279
4280/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
4281DECL_INLINE_THROW(uint32_t)
4282iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
4283{
4284 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4285 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
4286}
4287
4288
4289#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
4290 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
4291
4292#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
4293 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
4294
4295/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
4296DECL_INLINE_THROW(uint32_t)
4297iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4298 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
4299{
4300 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4301 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4302 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
4303}
4304
4305
4306#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
4307 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
4308
4309#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
4310 IEM_MC_LOCAL(a_rcType, a_rc); \
4311 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
4312
4313/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
4314DECL_INLINE_THROW(uint32_t)
4315iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4316 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4317{
4318 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4319 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4320 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4321 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
4322}
4323
4324
4325#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
4326 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4327
4328#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
4329 IEM_MC_LOCAL(a_rcType, a_rc); \
4330 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4331
4332/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
4333DECL_INLINE_THROW(uint32_t)
4334iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4335 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4336{
4337 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4338 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4339 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4340 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
4341 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
4342}
4343
4344
4345
4346/*********************************************************************************************************************************
4347* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
4348*********************************************************************************************************************************/
4349
4350#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
4351 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
4352
4353#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4354 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
4355
4356#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4357 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
4358
4359#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4360 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
4361
4362
4363/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
4364 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
4365DECL_INLINE_THROW(uint32_t)
4366iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
4367{
4368 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4369 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4370 Assert(iGRegEx < 20);
4371
4372 /* Same discussion as in iemNativeEmitFetchGregU16 */
4373 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4374 kIemNativeGstRegUse_ReadOnly);
4375
4376 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4377 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4378
4379 /* The value is zero-extended to the full 64-bit host register width. */
4380 if (iGRegEx < 16)
4381 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4382 else
4383 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4384
4385 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4386 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4387 return off;
4388}
4389
4390
4391#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4392 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
4393
4394#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4395 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
4396
4397#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4398 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
4399
4400/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
4401DECL_INLINE_THROW(uint32_t)
4402iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
4403{
4404 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4405 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4406 Assert(iGRegEx < 20);
4407
4408 /* Same discussion as in iemNativeEmitFetchGregU16 */
4409 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4410 kIemNativeGstRegUse_ReadOnly);
4411
4412 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4413 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4414
4415 if (iGRegEx < 16)
4416 {
4417 switch (cbSignExtended)
4418 {
4419 case sizeof(uint16_t):
4420 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4421 break;
4422 case sizeof(uint32_t):
4423 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4424 break;
4425 case sizeof(uint64_t):
4426 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4427 break;
4428 default: AssertFailed(); break;
4429 }
4430 }
4431 else
4432 {
4433 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4434 switch (cbSignExtended)
4435 {
4436 case sizeof(uint16_t):
4437 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4438 break;
4439 case sizeof(uint32_t):
4440 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4441 break;
4442 case sizeof(uint64_t):
4443 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4444 break;
4445 default: AssertFailed(); break;
4446 }
4447 }
4448
4449 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4450 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4451 return off;
4452}
4453
4454
4455
4456#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
4457 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
4458
4459#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
4460 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4461
4462#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
4463 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4464
4465/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
4466DECL_INLINE_THROW(uint32_t)
4467iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4468{
4469 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4470 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4471 Assert(iGReg < 16);
4472
4473 /*
4474 * We can either just load the low 16-bit of the GPR into a host register
4475 * for the variable, or we can do so via a shadow copy host register. The
4476 * latter will avoid having to reload it if it's being stored later, but
4477 * will waste a host register if it isn't touched again. Since we don't
4478 * know what going to happen, we choose the latter for now.
4479 */
4480 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4481 kIemNativeGstRegUse_ReadOnly);
4482
4483 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4484 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4485 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4486 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4487
4488 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4489 return off;
4490}
4491
4492#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
4493 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
4494
4495/** Emits code for IEM_MC_FETCH_GREG_I16. */
4496DECL_INLINE_THROW(uint32_t)
4497iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4498{
4499 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4500 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
4501 Assert(iGReg < 16);
4502
4503 /*
4504 * We can either just load the low 16-bit of the GPR into a host register
4505 * for the variable, or we can do so via a shadow copy host register. The
4506 * latter will avoid having to reload it if it's being stored later, but
4507 * will waste a host register if it isn't touched again. Since we don't
4508 * know what going to happen, we choose the latter for now.
4509 */
4510 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4511 kIemNativeGstRegUse_ReadOnly);
4512
4513 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4514 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4515#ifdef RT_ARCH_AMD64
4516 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4517#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
4518 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4519#endif
4520 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4521
4522 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4523 return off;
4524}
4525
4526
4527#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
4528 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4529
4530#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
4531 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4532
4533/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
4534DECL_INLINE_THROW(uint32_t)
4535iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
4536{
4537 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4538 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4539 Assert(iGReg < 16);
4540
4541 /*
4542 * We can either just load the low 16-bit of the GPR into a host register
4543 * for the variable, or we can do so via a shadow copy host register. The
4544 * latter will avoid having to reload it if it's being stored later, but
4545 * will waste a host register if it isn't touched again. Since we don't
4546 * know what going to happen, we choose the latter for now.
4547 */
4548 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4549 kIemNativeGstRegUse_ReadOnly);
4550
4551 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4552 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4553 if (cbSignExtended == sizeof(uint32_t))
4554 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4555 else
4556 {
4557 Assert(cbSignExtended == sizeof(uint64_t));
4558 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4559 }
4560 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4561
4562 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4563 return off;
4564}
4565
4566
4567#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
4568 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
4569
4570#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
4571 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
4572
4573#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
4574 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
4575
4576/** Emits code for IEM_MC_FETCH_GREG_U32. */
4577DECL_INLINE_THROW(uint32_t)
4578iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4579{
4580 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4581 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4582 Assert(iGReg < 16);
4583
4584 /*
4585 * We can either just load the low 16-bit of the GPR into a host register
4586 * for the variable, or we can do so via a shadow copy host register. The
4587 * latter will avoid having to reload it if it's being stored later, but
4588 * will waste a host register if it isn't touched again. Since we don't
4589 * know what going to happen, we choose the latter for now.
4590 */
4591 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4592 kIemNativeGstRegUse_ReadOnly);
4593
4594 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4595 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4596 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4597 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4598
4599 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4600 return off;
4601}
4602
4603
4604#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4605 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4606
4607/** Emits code for IEM_MC_FETCH_GREG_U32. */
4608DECL_INLINE_THROW(uint32_t)
4609iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4610{
4611 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4612 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4613 Assert(iGReg < 16);
4614
4615 /*
4616 * We can either just load the low 32-bit of the GPR into a host register
4617 * for the variable, or we can do so via a shadow copy host register. The
4618 * latter will avoid having to reload it if it's being stored later, but
4619 * will waste a host register if it isn't touched again. Since we don't
4620 * know what going to happen, we choose the latter for now.
4621 */
4622 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4623 kIemNativeGstRegUse_ReadOnly);
4624
4625 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4626 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4627 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4628 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4629
4630 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4631 return off;
4632}
4633
4634
4635#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4636 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4637
4638#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4639 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4640
4641/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4642 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4643DECL_INLINE_THROW(uint32_t)
4644iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4645{
4646 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4647 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4648 Assert(iGReg < 16);
4649
4650 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4651 kIemNativeGstRegUse_ReadOnly);
4652
4653 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4654 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4655 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4656 /** @todo name the register a shadow one already? */
4657 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4658
4659 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4660 return off;
4661}
4662
4663
4664#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4665#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4666 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4667
4668/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4669DECL_INLINE_THROW(uint32_t)
4670iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4671{
4672 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4673 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4674 Assert(iGRegLo < 16 && iGRegHi < 16);
4675
4676 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4677 kIemNativeGstRegUse_ReadOnly);
4678 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4679 kIemNativeGstRegUse_ReadOnly);
4680
4681 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4682 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4683 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4684 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4685
4686 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4687 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4688 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4689 return off;
4690}
4691#endif
4692
4693
4694/*********************************************************************************************************************************
4695* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4696*********************************************************************************************************************************/
4697
4698#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4699 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4700
4701/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4702DECL_INLINE_THROW(uint32_t)
4703iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4704{
4705 Assert(iGRegEx < 20);
4706 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4707 kIemNativeGstRegUse_ForUpdate);
4708#ifdef RT_ARCH_AMD64
4709 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4710
4711 /* To the lowest byte of the register: mov r8, imm8 */
4712 if (iGRegEx < 16)
4713 {
4714 if (idxGstTmpReg >= 8)
4715 pbCodeBuf[off++] = X86_OP_REX_B;
4716 else if (idxGstTmpReg >= 4)
4717 pbCodeBuf[off++] = X86_OP_REX;
4718 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4719 pbCodeBuf[off++] = u8Value;
4720 }
4721 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4722 else if (idxGstTmpReg < 4)
4723 {
4724 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4725 pbCodeBuf[off++] = u8Value;
4726 }
4727 else
4728 {
4729 /* ror reg64, 8 */
4730 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4731 pbCodeBuf[off++] = 0xc1;
4732 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4733 pbCodeBuf[off++] = 8;
4734
4735 /* mov reg8, imm8 */
4736 if (idxGstTmpReg >= 8)
4737 pbCodeBuf[off++] = X86_OP_REX_B;
4738 else if (idxGstTmpReg >= 4)
4739 pbCodeBuf[off++] = X86_OP_REX;
4740 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4741 pbCodeBuf[off++] = u8Value;
4742
4743 /* rol reg64, 8 */
4744 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4745 pbCodeBuf[off++] = 0xc1;
4746 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4747 pbCodeBuf[off++] = 8;
4748 }
4749
4750#elif defined(RT_ARCH_ARM64)
4751 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4752 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4753 if (iGRegEx < 16)
4754 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4755 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4756 else
4757 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4758 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4759 iemNativeRegFreeTmp(pReNative, idxImmReg);
4760
4761#else
4762# error "Port me!"
4763#endif
4764
4765 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4766
4767#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4768 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4769#endif
4770
4771 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4772 return off;
4773}
4774
4775
4776#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4777 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4778
4779/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4780DECL_INLINE_THROW(uint32_t)
4781iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4782{
4783 Assert(iGRegEx < 20);
4784 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4785
4786 /*
4787 * If it's a constant value (unlikely) we treat this as a
4788 * IEM_MC_STORE_GREG_U8_CONST statement.
4789 */
4790 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4791 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4792 { /* likely */ }
4793 else
4794 {
4795 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4796 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4797 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4798 }
4799
4800 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4801 kIemNativeGstRegUse_ForUpdate);
4802 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4803
4804#ifdef RT_ARCH_AMD64
4805 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4806 if (iGRegEx < 16)
4807 {
4808 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4809 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4810 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4811 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4812 pbCodeBuf[off++] = X86_OP_REX;
4813 pbCodeBuf[off++] = 0x8a;
4814 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4815 }
4816 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4817 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4818 {
4819 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4820 pbCodeBuf[off++] = 0x8a;
4821 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4822 }
4823 else
4824 {
4825 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4826
4827 /* ror reg64, 8 */
4828 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4829 pbCodeBuf[off++] = 0xc1;
4830 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4831 pbCodeBuf[off++] = 8;
4832
4833 /* mov reg8, reg8(r/m) */
4834 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4835 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4836 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4837 pbCodeBuf[off++] = X86_OP_REX;
4838 pbCodeBuf[off++] = 0x8a;
4839 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4840
4841 /* rol reg64, 8 */
4842 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4843 pbCodeBuf[off++] = 0xc1;
4844 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4845 pbCodeBuf[off++] = 8;
4846 }
4847
4848#elif defined(RT_ARCH_ARM64)
4849 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4850 or
4851 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4852 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4853 if (iGRegEx < 16)
4854 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4855 else
4856 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4857
4858#else
4859# error "Port me!"
4860#endif
4861 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4862
4863 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4864
4865#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4866 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4867#endif
4868 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4869 return off;
4870}
4871
4872
4873
4874#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4875 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4876
4877/** Emits code for IEM_MC_STORE_GREG_U16. */
4878DECL_INLINE_THROW(uint32_t)
4879iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4880{
4881 Assert(iGReg < 16);
4882 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4883 kIemNativeGstRegUse_ForUpdate);
4884#ifdef RT_ARCH_AMD64
4885 /* mov reg16, imm16 */
4886 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4887 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4888 if (idxGstTmpReg >= 8)
4889 pbCodeBuf[off++] = X86_OP_REX_B;
4890 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4891 pbCodeBuf[off++] = RT_BYTE1(uValue);
4892 pbCodeBuf[off++] = RT_BYTE2(uValue);
4893
4894#elif defined(RT_ARCH_ARM64)
4895 /* movk xdst, #uValue, lsl #0 */
4896 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4897 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4898
4899#else
4900# error "Port me!"
4901#endif
4902
4903 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4904
4905#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4906 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4907#endif
4908 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4909 return off;
4910}
4911
4912
4913#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4914 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4915
4916/** Emits code for IEM_MC_STORE_GREG_U16. */
4917DECL_INLINE_THROW(uint32_t)
4918iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4919{
4920 Assert(iGReg < 16);
4921 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4922
4923 /*
4924 * If it's a constant value (unlikely) we treat this as a
4925 * IEM_MC_STORE_GREG_U16_CONST statement.
4926 */
4927 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4928 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4929 { /* likely */ }
4930 else
4931 {
4932 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4933 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4934 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4935 }
4936
4937 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4938 kIemNativeGstRegUse_ForUpdate);
4939
4940#ifdef RT_ARCH_AMD64
4941 /* mov reg16, reg16 or [mem16] */
4942 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4943 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4944 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4945 {
4946 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4947 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4948 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4949 pbCodeBuf[off++] = 0x8b;
4950 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4951 }
4952 else
4953 {
4954 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4955 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4956 if (idxGstTmpReg >= 8)
4957 pbCodeBuf[off++] = X86_OP_REX_R;
4958 pbCodeBuf[off++] = 0x8b;
4959 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4960 }
4961
4962#elif defined(RT_ARCH_ARM64)
4963 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4964 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4965 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4966 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4967 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4968
4969#else
4970# error "Port me!"
4971#endif
4972
4973 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4974
4975#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4976 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4977#endif
4978 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4979 return off;
4980}
4981
4982
4983#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
4984 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
4985
4986/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
4987DECL_INLINE_THROW(uint32_t)
4988iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
4989{
4990 Assert(iGReg < 16);
4991 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4992 kIemNativeGstRegUse_ForFullWrite);
4993 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4994#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4995 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4996#endif
4997 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4998 return off;
4999}
5000
5001
5002#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
5003 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
5004
5005#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
5006 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
5007
5008/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
5009DECL_INLINE_THROW(uint32_t)
5010iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5011{
5012 Assert(iGReg < 16);
5013 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5014
5015 /*
5016 * If it's a constant value (unlikely) we treat this as a
5017 * IEM_MC_STORE_GREG_U32_CONST statement.
5018 */
5019 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5020 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5021 { /* likely */ }
5022 else
5023 {
5024 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5025 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5026 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
5027 }
5028
5029 /*
5030 * For the rest we allocate a guest register for the variable and writes
5031 * it to the CPUMCTX structure.
5032 */
5033 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5034#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5035 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5036#else
5037 RT_NOREF(idxVarReg);
5038#endif
5039#ifdef VBOX_STRICT
5040 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
5041#endif
5042 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5043 return off;
5044}
5045
5046
5047#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
5048 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
5049
5050/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
5051DECL_INLINE_THROW(uint32_t)
5052iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
5053{
5054 Assert(iGReg < 16);
5055 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5056 kIemNativeGstRegUse_ForFullWrite);
5057 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5058#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5059 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5060#endif
5061 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5062 return off;
5063}
5064
5065
5066#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
5067 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
5068
5069#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
5070 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
5071
5072/** Emits code for IEM_MC_STORE_GREG_U64. */
5073DECL_INLINE_THROW(uint32_t)
5074iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5075{
5076 Assert(iGReg < 16);
5077 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5078
5079 /*
5080 * If it's a constant value (unlikely) we treat this as a
5081 * IEM_MC_STORE_GREG_U64_CONST statement.
5082 */
5083 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5084 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5085 { /* likely */ }
5086 else
5087 {
5088 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5089 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5090 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
5091 }
5092
5093 /*
5094 * For the rest we allocate a guest register for the variable and writes
5095 * it to the CPUMCTX structure.
5096 */
5097 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5098#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5099 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5100#else
5101 RT_NOREF(idxVarReg);
5102#endif
5103 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5104 return off;
5105}
5106
5107
5108#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
5109 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
5110
5111/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
5112DECL_INLINE_THROW(uint32_t)
5113iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
5114{
5115 Assert(iGReg < 16);
5116 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5117 kIemNativeGstRegUse_ForUpdate);
5118 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
5119#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5120 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5121#endif
5122 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5123 return off;
5124}
5125
5126
5127#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5128#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
5129 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
5130
5131/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
5132DECL_INLINE_THROW(uint32_t)
5133iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
5134{
5135 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5136 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
5137 Assert(iGRegLo < 16 && iGRegHi < 16);
5138
5139 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
5140 kIemNativeGstRegUse_ForFullWrite);
5141 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
5142 kIemNativeGstRegUse_ForFullWrite);
5143
5144 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5145 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
5146 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
5147 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
5148
5149 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
5150 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
5151 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
5152 return off;
5153}
5154#endif
5155
5156
5157/*********************************************************************************************************************************
5158* General purpose register manipulation (add, sub). *
5159*********************************************************************************************************************************/
5160
5161#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5162 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5163
5164/** Emits code for IEM_MC_ADD_GREG_U16. */
5165DECL_INLINE_THROW(uint32_t)
5166iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
5167{
5168 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5169 kIemNativeGstRegUse_ForUpdate);
5170
5171#ifdef RT_ARCH_AMD64
5172 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5173 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5174 if (idxGstTmpReg >= 8)
5175 pbCodeBuf[off++] = X86_OP_REX_B;
5176 if (uAddend == 1)
5177 {
5178 pbCodeBuf[off++] = 0xff; /* inc */
5179 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5180 }
5181 else
5182 {
5183 pbCodeBuf[off++] = 0x81;
5184 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5185 pbCodeBuf[off++] = uAddend;
5186 pbCodeBuf[off++] = 0;
5187 }
5188
5189#else
5190 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5191 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5192
5193 /* sub tmp, gstgrp, uAddend */
5194 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
5195
5196 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5197 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5198
5199 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5200#endif
5201
5202 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5203
5204#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5205 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5206#endif
5207
5208 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5209 return off;
5210}
5211
5212
5213#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
5214 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5215
5216#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
5217 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5218
5219/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
5220DECL_INLINE_THROW(uint32_t)
5221iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
5222{
5223 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5224 kIemNativeGstRegUse_ForUpdate);
5225
5226#ifdef RT_ARCH_AMD64
5227 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5228 if (f64Bit)
5229 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5230 else if (idxGstTmpReg >= 8)
5231 pbCodeBuf[off++] = X86_OP_REX_B;
5232 if (uAddend == 1)
5233 {
5234 pbCodeBuf[off++] = 0xff; /* inc */
5235 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5236 }
5237 else if (uAddend < 128)
5238 {
5239 pbCodeBuf[off++] = 0x83; /* add */
5240 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5241 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5242 }
5243 else
5244 {
5245 pbCodeBuf[off++] = 0x81; /* add */
5246 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5247 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5248 pbCodeBuf[off++] = 0;
5249 pbCodeBuf[off++] = 0;
5250 pbCodeBuf[off++] = 0;
5251 }
5252
5253#else
5254 /* sub tmp, gstgrp, uAddend */
5255 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5256 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
5257
5258#endif
5259
5260 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5261
5262#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5263 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5264#endif
5265
5266 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5267 return off;
5268}
5269
5270
5271
5272#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5273 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5274
5275/** Emits code for IEM_MC_SUB_GREG_U16. */
5276DECL_INLINE_THROW(uint32_t)
5277iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5278{
5279 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5280 kIemNativeGstRegUse_ForUpdate);
5281
5282#ifdef RT_ARCH_AMD64
5283 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5284 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5285 if (idxGstTmpReg >= 8)
5286 pbCodeBuf[off++] = X86_OP_REX_B;
5287 if (uSubtrahend == 1)
5288 {
5289 pbCodeBuf[off++] = 0xff; /* dec */
5290 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5291 }
5292 else
5293 {
5294 pbCodeBuf[off++] = 0x81;
5295 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5296 pbCodeBuf[off++] = uSubtrahend;
5297 pbCodeBuf[off++] = 0;
5298 }
5299
5300#else
5301 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5302 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5303
5304 /* sub tmp, gstgrp, uSubtrahend */
5305 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5306
5307 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5308 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5309
5310 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5311#endif
5312
5313 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5314
5315#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5316 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5317#endif
5318
5319 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5320 return off;
5321}
5322
5323
5324#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5325 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5326
5327#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5328 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5329
5330/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5331DECL_INLINE_THROW(uint32_t)
5332iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5333{
5334 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5335 kIemNativeGstRegUse_ForUpdate);
5336
5337#ifdef RT_ARCH_AMD64
5338 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5339 if (f64Bit)
5340 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5341 else if (idxGstTmpReg >= 8)
5342 pbCodeBuf[off++] = X86_OP_REX_B;
5343 if (uSubtrahend == 1)
5344 {
5345 pbCodeBuf[off++] = 0xff; /* dec */
5346 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5347 }
5348 else if (uSubtrahend < 128)
5349 {
5350 pbCodeBuf[off++] = 0x83; /* sub */
5351 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5352 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5353 }
5354 else
5355 {
5356 pbCodeBuf[off++] = 0x81; /* sub */
5357 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5358 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5359 pbCodeBuf[off++] = 0;
5360 pbCodeBuf[off++] = 0;
5361 pbCodeBuf[off++] = 0;
5362 }
5363
5364#else
5365 /* sub tmp, gstgrp, uSubtrahend */
5366 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5367 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5368
5369#endif
5370
5371 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5372
5373#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5374 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5375#endif
5376
5377 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5378 return off;
5379}
5380
5381
5382#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
5383 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5384
5385#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
5386 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5387
5388#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
5389 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5390
5391#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
5392 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5393
5394/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
5395DECL_INLINE_THROW(uint32_t)
5396iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5397{
5398#ifdef VBOX_STRICT
5399 switch (cbMask)
5400 {
5401 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5402 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5403 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5404 case sizeof(uint64_t): break;
5405 default: AssertFailedBreak();
5406 }
5407#endif
5408
5409 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5410 kIemNativeGstRegUse_ForUpdate);
5411
5412 switch (cbMask)
5413 {
5414 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5415 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
5416 break;
5417 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
5418 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
5419 break;
5420 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5421 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5422 break;
5423 case sizeof(uint64_t):
5424 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
5425 break;
5426 default: AssertFailedBreak();
5427 }
5428
5429 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5430
5431#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5432 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5433#endif
5434
5435 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5436 return off;
5437}
5438
5439
5440#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
5441 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5442
5443#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
5444 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5445
5446#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
5447 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5448
5449#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
5450 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5451
5452/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
5453DECL_INLINE_THROW(uint32_t)
5454iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5455{
5456#ifdef VBOX_STRICT
5457 switch (cbMask)
5458 {
5459 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5460 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5461 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5462 case sizeof(uint64_t): break;
5463 default: AssertFailedBreak();
5464 }
5465#endif
5466
5467 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5468 kIemNativeGstRegUse_ForUpdate);
5469
5470 switch (cbMask)
5471 {
5472 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5473 case sizeof(uint16_t):
5474 case sizeof(uint64_t):
5475 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
5476 break;
5477 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5478 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5479 break;
5480 default: AssertFailedBreak();
5481 }
5482
5483 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5484
5485#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5486 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5487#endif
5488
5489 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5490 return off;
5491}
5492
5493
5494/*********************************************************************************************************************************
5495* Local/Argument variable manipulation (add, sub, and, or). *
5496*********************************************************************************************************************************/
5497
5498#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
5499 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5500
5501#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
5502 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5503
5504#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
5505 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5506
5507#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
5508 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5509
5510
5511#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
5512 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
5513
5514#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
5515 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
5516
5517#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
5518 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
5519
5520/** Emits code for AND'ing a local and a constant value. */
5521DECL_INLINE_THROW(uint32_t)
5522iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5523{
5524#ifdef VBOX_STRICT
5525 switch (cbMask)
5526 {
5527 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5528 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5529 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5530 case sizeof(uint64_t): break;
5531 default: AssertFailedBreak();
5532 }
5533#endif
5534
5535 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5536 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5537
5538 if (cbMask <= sizeof(uint32_t))
5539 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
5540 else
5541 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
5542
5543 iemNativeVarRegisterRelease(pReNative, idxVar);
5544 return off;
5545}
5546
5547
5548#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
5549 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5550
5551#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
5552 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5553
5554#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
5555 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5556
5557#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
5558 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5559
5560/** Emits code for OR'ing a local and a constant value. */
5561DECL_INLINE_THROW(uint32_t)
5562iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5563{
5564#ifdef VBOX_STRICT
5565 switch (cbMask)
5566 {
5567 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5568 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5569 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5570 case sizeof(uint64_t): break;
5571 default: AssertFailedBreak();
5572 }
5573#endif
5574
5575 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5576 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5577
5578 if (cbMask <= sizeof(uint32_t))
5579 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
5580 else
5581 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
5582
5583 iemNativeVarRegisterRelease(pReNative, idxVar);
5584 return off;
5585}
5586
5587
5588#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
5589 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
5590
5591#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
5592 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
5593
5594#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
5595 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
5596
5597/** Emits code for reversing the byte order in a local value. */
5598DECL_INLINE_THROW(uint32_t)
5599iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
5600{
5601 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5602 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5603
5604 switch (cbLocal)
5605 {
5606 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5607 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5608 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5609 default: AssertFailedBreak();
5610 }
5611
5612 iemNativeVarRegisterRelease(pReNative, idxVar);
5613 return off;
5614}
5615
5616
5617#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5618 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5619
5620#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5621 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5622
5623#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5624 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5625
5626/** Emits code for shifting left a local value. */
5627DECL_INLINE_THROW(uint32_t)
5628iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5629{
5630#ifdef VBOX_STRICT
5631 switch (cbLocal)
5632 {
5633 case sizeof(uint8_t): Assert(cShift < 8); break;
5634 case sizeof(uint16_t): Assert(cShift < 16); break;
5635 case sizeof(uint32_t): Assert(cShift < 32); break;
5636 case sizeof(uint64_t): Assert(cShift < 64); break;
5637 default: AssertFailedBreak();
5638 }
5639#endif
5640
5641 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5642 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5643
5644 if (cbLocal <= sizeof(uint32_t))
5645 {
5646 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5647 if (cbLocal < sizeof(uint32_t))
5648 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5649 cbLocal == sizeof(uint16_t)
5650 ? UINT32_C(0xffff)
5651 : UINT32_C(0xff));
5652 }
5653 else
5654 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5655
5656 iemNativeVarRegisterRelease(pReNative, idxVar);
5657 return off;
5658}
5659
5660
5661#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5662 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5663
5664#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5665 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5666
5667#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5668 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5669
5670/** Emits code for shifting left a local value. */
5671DECL_INLINE_THROW(uint32_t)
5672iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5673{
5674#ifdef VBOX_STRICT
5675 switch (cbLocal)
5676 {
5677 case sizeof(int8_t): Assert(cShift < 8); break;
5678 case sizeof(int16_t): Assert(cShift < 16); break;
5679 case sizeof(int32_t): Assert(cShift < 32); break;
5680 case sizeof(int64_t): Assert(cShift < 64); break;
5681 default: AssertFailedBreak();
5682 }
5683#endif
5684
5685 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5686 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5687
5688 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5689 if (cbLocal == sizeof(uint8_t))
5690 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5691 else if (cbLocal == sizeof(uint16_t))
5692 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5693
5694 if (cbLocal <= sizeof(uint32_t))
5695 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5696 else
5697 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5698
5699 iemNativeVarRegisterRelease(pReNative, idxVar);
5700 return off;
5701}
5702
5703
5704#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5705 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5706
5707#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5708 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5709
5710#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5711 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5712
5713/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5714DECL_INLINE_THROW(uint32_t)
5715iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5716{
5717 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5718 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5719 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5720 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5721
5722 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5723 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
5724
5725 /* Need to sign extend the value. */
5726 if (cbLocal <= sizeof(uint32_t))
5727 {
5728/** @todo ARM64: In case of boredone, the extended add instruction can do the
5729 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5730 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5731
5732 switch (cbLocal)
5733 {
5734 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5735 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5736 default: AssertFailed();
5737 }
5738
5739 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5740 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5741 }
5742 else
5743 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5744
5745 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5746 iemNativeVarRegisterRelease(pReNative, idxVar);
5747 return off;
5748}
5749
5750
5751
5752/*********************************************************************************************************************************
5753* EFLAGS *
5754*********************************************************************************************************************************/
5755
5756#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5757# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5758#else
5759# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5760 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5761
5762DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5763{
5764 if (fEflOutput)
5765 {
5766 PVMCPUCC const pVCpu = pReNative->pVCpu;
5767# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5768 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5769 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5770 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5771# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5772 if (fEflOutput & (a_fEfl)) \
5773 { \
5774 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5775 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5776 else \
5777 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5778 } else do { } while (0)
5779# else
5780 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5781 IEMLIVENESSBIT const LivenessClobbered =
5782 {
5783 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5784 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5785 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5786 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5787 };
5788 IEMLIVENESSBIT const LivenessDelayable =
5789 {
5790 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5791 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5792 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5793 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5794 };
5795# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5796 if (fEflOutput & (a_fEfl)) \
5797 { \
5798 if (LivenessClobbered.a_fLivenessMember) \
5799 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5800 else if (LivenessDelayable.a_fLivenessMember) \
5801 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5802 else \
5803 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5804 } else do { } while (0)
5805# endif
5806 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5807 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5808 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5809 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5810 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5811 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5812 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5813# undef CHECK_FLAG_AND_UPDATE_STATS
5814 }
5815 RT_NOREF(fEflInput);
5816}
5817#endif /* VBOX_WITH_STATISTICS */
5818
5819#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5820#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5821 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5822
5823/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5824DECL_INLINE_THROW(uint32_t)
5825iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5826 uint32_t fEflInput, uint32_t fEflOutput)
5827{
5828 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5829 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5830 RT_NOREF(fEflInput, fEflOutput);
5831
5832#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5833# ifdef VBOX_STRICT
5834 if ( pReNative->idxCurCall != 0
5835 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5836 {
5837 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5838 uint32_t const fBoth = fEflInput | fEflOutput;
5839# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5840 AssertMsg( !(fBoth & (a_fElfConst)) \
5841 || (!(fEflInput & (a_fElfConst)) \
5842 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5843 : !(fEflOutput & (a_fElfConst)) \
5844 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5845 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5846 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5847 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5848 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5849 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5850 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5851 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5852 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5853 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5854# undef ASSERT_ONE_EFL
5855 }
5856# endif
5857#endif
5858
5859 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5860
5861 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
5862 * the existing shadow copy. */
5863 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5864 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5865 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5866 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5867 return off;
5868}
5869
5870
5871
5872/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5873 * start using it with custom native code emission (inlining assembly
5874 * instruction helpers). */
5875#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5876#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5877 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5878 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5879
5880#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5881#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5882 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5883 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5884
5885/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5886DECL_INLINE_THROW(uint32_t)
5887iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5888 bool fUpdateSkipping)
5889{
5890 RT_NOREF(fEflOutput);
5891 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5892 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5893
5894#ifdef VBOX_STRICT
5895 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5896 uint32_t offFixup = off;
5897 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5898 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5899 iemNativeFixupFixedJump(pReNative, offFixup, off);
5900
5901 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5902 offFixup = off;
5903 off = iemNativeEmitJzToFixed(pReNative, off, off);
5904 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5905 iemNativeFixupFixedJump(pReNative, offFixup, off);
5906
5907 /** @todo validate that only bits in the fElfOutput mask changed. */
5908#endif
5909
5910#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5911 if (fUpdateSkipping)
5912 {
5913 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5914 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5915 else
5916 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5917 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5918 }
5919#else
5920 RT_NOREF_PV(fUpdateSkipping);
5921#endif
5922
5923 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5924 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5925 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5926 return off;
5927}
5928
5929
5930typedef enum IEMNATIVEMITEFLOP
5931{
5932 kIemNativeEmitEflOp_Invalid = 0,
5933 kIemNativeEmitEflOp_Set,
5934 kIemNativeEmitEflOp_Clear,
5935 kIemNativeEmitEflOp_Flip
5936} IEMNATIVEMITEFLOP;
5937
5938#define IEM_MC_SET_EFL_BIT(a_fBit) \
5939 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
5940
5941#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5942 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
5943
5944#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5945 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
5946
5947/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5948DECL_INLINE_THROW(uint32_t)
5949iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
5950{
5951 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5952 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
5953
5954 switch (enmOp)
5955 {
5956 case kIemNativeEmitEflOp_Set:
5957 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5958 break;
5959 case kIemNativeEmitEflOp_Clear:
5960 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
5961 break;
5962 case kIemNativeEmitEflOp_Flip:
5963 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5964 break;
5965 default:
5966 AssertFailed();
5967 break;
5968 }
5969
5970 /** @todo No delayed writeback for EFLAGS right now. */
5971 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5972
5973 /* Free but don't flush the EFLAGS register. */
5974 iemNativeRegFreeTmp(pReNative, idxEflReg);
5975
5976 return off;
5977}
5978
5979
5980/*********************************************************************************************************************************
5981* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
5982*********************************************************************************************************************************/
5983
5984#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
5985 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
5986
5987#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
5988 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
5989
5990#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
5991 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
5992
5993
5994/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
5995 * IEM_MC_FETCH_SREG_ZX_U64. */
5996DECL_INLINE_THROW(uint32_t)
5997iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
5998{
5999 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6000 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
6001 Assert(iSReg < X86_SREG_COUNT);
6002
6003 /*
6004 * For now, we will not create a shadow copy of a selector. The rational
6005 * is that since we do not recompile the popping and loading of segment
6006 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
6007 * pushing and moving to registers, there is only a small chance that the
6008 * shadow copy will be accessed again before the register is reloaded. One
6009 * scenario would be nested called in 16-bit code, but I doubt it's worth
6010 * the extra register pressure atm.
6011 *
6012 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
6013 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
6014 * store scencario covered at present (r160730).
6015 */
6016 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6017 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6018 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
6019 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6020 return off;
6021}
6022
6023
6024
6025/*********************************************************************************************************************************
6026* Register references. *
6027*********************************************************************************************************************************/
6028
6029#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6030 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6031
6032#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
6033 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6034
6035/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6036DECL_INLINE_THROW(uint32_t)
6037iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6038{
6039 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6040 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6041 Assert(iGRegEx < 20);
6042
6043 if (iGRegEx < 16)
6044 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6045 else
6046 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6047
6048 /* If we've delayed writing back the register value, flush it now. */
6049 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6050
6051 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6052 if (!fConst)
6053 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6054
6055 return off;
6056}
6057
6058#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6059 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6060
6061#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6062 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6063
6064#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6065 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6066
6067#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6068 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6069
6070#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
6071 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
6072
6073#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
6074 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
6075
6076#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
6077 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
6078
6079#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
6080 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
6081
6082#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
6083 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
6084
6085#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
6086 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
6087
6088/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
6089DECL_INLINE_THROW(uint32_t)
6090iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
6091{
6092 Assert(iGReg < 16);
6093 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
6094 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6095
6096 /* If we've delayed writing back the register value, flush it now. */
6097 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
6098
6099 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6100 if (!fConst)
6101 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
6102
6103 return off;
6104}
6105
6106
6107#undef IEM_MC_REF_EFLAGS /* should not be used. */
6108#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
6109 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6110 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
6111
6112/** Handles IEM_MC_REF_EFLAGS. */
6113DECL_INLINE_THROW(uint32_t)
6114iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
6115{
6116 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
6117 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6118
6119#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6120 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
6121
6122 /* Updating the skipping according to the outputs is a little early, but
6123 we don't have any other hooks for references atm. */
6124 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6125 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6126 else if (fEflOutput & X86_EFL_STATUS_BITS)
6127 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
6128 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6129#else
6130 RT_NOREF(fEflInput, fEflOutput);
6131#endif
6132
6133 /* If we've delayed writing back the register value, flush it now. */
6134 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
6135
6136 /* If there is a shadow copy of guest EFLAGS, flush it now. */
6137 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
6138
6139 return off;
6140}
6141
6142
6143/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
6144 * different code from threaded recompiler, maybe it would be helpful. For now
6145 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
6146#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
6147
6148
6149#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
6150 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
6151
6152#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
6153 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
6154
6155#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
6156 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
6157
6158#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
6159 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
6160
6161#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6162/* Just being paranoid here. */
6163# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
6164AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
6165AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
6166AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
6167AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
6168# endif
6169AssertCompileMemberOffset(X86XMMREG, au64, 0);
6170AssertCompileMemberOffset(X86XMMREG, au32, 0);
6171AssertCompileMemberOffset(X86XMMREG, ar64, 0);
6172AssertCompileMemberOffset(X86XMMREG, ar32, 0);
6173
6174# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
6175 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
6176# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
6177 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
6178# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
6179 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
6180# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
6181 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
6182#endif
6183
6184/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
6185DECL_INLINE_THROW(uint32_t)
6186iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
6187{
6188 Assert(iXReg < 16);
6189 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
6190 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6191
6192 /* If we've delayed writing back the register value, flush it now. */
6193 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
6194
6195#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6196 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6197 if (!fConst)
6198 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
6199#else
6200 RT_NOREF(fConst);
6201#endif
6202
6203 return off;
6204}
6205
6206
6207
6208/*********************************************************************************************************************************
6209* Effective Address Calculation *
6210*********************************************************************************************************************************/
6211#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6212 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6213
6214/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6215 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6216DECL_INLINE_THROW(uint32_t)
6217iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6218 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6219{
6220 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6221
6222 /*
6223 * Handle the disp16 form with no registers first.
6224 *
6225 * Convert to an immediate value, as that'll delay the register allocation
6226 * and assignment till the memory access / call / whatever and we can use
6227 * a more appropriate register (or none at all).
6228 */
6229 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6230 {
6231 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
6232 return off;
6233 }
6234
6235 /* Determin the displacment. */
6236 uint16_t u16EffAddr;
6237 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6238 {
6239 case 0: u16EffAddr = 0; break;
6240 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6241 case 2: u16EffAddr = u16Disp; break;
6242 default: AssertFailedStmt(u16EffAddr = 0);
6243 }
6244
6245 /* Determine the registers involved. */
6246 uint8_t idxGstRegBase;
6247 uint8_t idxGstRegIndex;
6248 switch (bRm & X86_MODRM_RM_MASK)
6249 {
6250 case 0:
6251 idxGstRegBase = X86_GREG_xBX;
6252 idxGstRegIndex = X86_GREG_xSI;
6253 break;
6254 case 1:
6255 idxGstRegBase = X86_GREG_xBX;
6256 idxGstRegIndex = X86_GREG_xDI;
6257 break;
6258 case 2:
6259 idxGstRegBase = X86_GREG_xBP;
6260 idxGstRegIndex = X86_GREG_xSI;
6261 break;
6262 case 3:
6263 idxGstRegBase = X86_GREG_xBP;
6264 idxGstRegIndex = X86_GREG_xDI;
6265 break;
6266 case 4:
6267 idxGstRegBase = X86_GREG_xSI;
6268 idxGstRegIndex = UINT8_MAX;
6269 break;
6270 case 5:
6271 idxGstRegBase = X86_GREG_xDI;
6272 idxGstRegIndex = UINT8_MAX;
6273 break;
6274 case 6:
6275 idxGstRegBase = X86_GREG_xBP;
6276 idxGstRegIndex = UINT8_MAX;
6277 break;
6278#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6279 default:
6280#endif
6281 case 7:
6282 idxGstRegBase = X86_GREG_xBX;
6283 idxGstRegIndex = UINT8_MAX;
6284 break;
6285 }
6286
6287 /*
6288 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6289 */
6290 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6291 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6292 kIemNativeGstRegUse_ReadOnly);
6293 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6294 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6295 kIemNativeGstRegUse_ReadOnly)
6296 : UINT8_MAX;
6297#ifdef RT_ARCH_AMD64
6298 if (idxRegIndex == UINT8_MAX)
6299 {
6300 if (u16EffAddr == 0)
6301 {
6302 /* movxz ret, base */
6303 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6304 }
6305 else
6306 {
6307 /* lea ret32, [base64 + disp32] */
6308 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6309 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6310 if (idxRegRet >= 8 || idxRegBase >= 8)
6311 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6312 pbCodeBuf[off++] = 0x8d;
6313 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6314 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6315 else
6316 {
6317 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6318 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6319 }
6320 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6321 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6322 pbCodeBuf[off++] = 0;
6323 pbCodeBuf[off++] = 0;
6324 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6325
6326 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6327 }
6328 }
6329 else
6330 {
6331 /* lea ret32, [index64 + base64 (+ disp32)] */
6332 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6333 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6334 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6335 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6336 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6337 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6338 pbCodeBuf[off++] = 0x8d;
6339 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6340 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6341 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6342 if (bMod == X86_MOD_MEM4)
6343 {
6344 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6345 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6346 pbCodeBuf[off++] = 0;
6347 pbCodeBuf[off++] = 0;
6348 }
6349 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6350 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6351 }
6352
6353#elif defined(RT_ARCH_ARM64)
6354 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6355 if (u16EffAddr == 0)
6356 {
6357 if (idxRegIndex == UINT8_MAX)
6358 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
6359 else
6360 {
6361 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
6362 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6363 }
6364 }
6365 else
6366 {
6367 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
6368 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
6369 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
6370 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6371 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
6372 else
6373 {
6374 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
6375 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6376 }
6377 if (idxRegIndex != UINT8_MAX)
6378 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
6379 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6380 }
6381
6382#else
6383# error "port me"
6384#endif
6385
6386 if (idxRegIndex != UINT8_MAX)
6387 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6388 iemNativeRegFreeTmp(pReNative, idxRegBase);
6389 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6390 return off;
6391}
6392
6393
6394#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6395 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6396
6397/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
6398 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
6399DECL_INLINE_THROW(uint32_t)
6400iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6401 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
6402{
6403 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6404
6405 /*
6406 * Handle the disp32 form with no registers first.
6407 *
6408 * Convert to an immediate value, as that'll delay the register allocation
6409 * and assignment till the memory access / call / whatever and we can use
6410 * a more appropriate register (or none at all).
6411 */
6412 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6413 {
6414 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
6415 return off;
6416 }
6417
6418 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6419 uint32_t u32EffAddr = 0;
6420 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6421 {
6422 case 0: break;
6423 case 1: u32EffAddr = (int8_t)u32Disp; break;
6424 case 2: u32EffAddr = u32Disp; break;
6425 default: AssertFailed();
6426 }
6427
6428 /* Get the register (or SIB) value. */
6429 uint8_t idxGstRegBase = UINT8_MAX;
6430 uint8_t idxGstRegIndex = UINT8_MAX;
6431 uint8_t cShiftIndex = 0;
6432 switch (bRm & X86_MODRM_RM_MASK)
6433 {
6434 case 0: idxGstRegBase = X86_GREG_xAX; break;
6435 case 1: idxGstRegBase = X86_GREG_xCX; break;
6436 case 2: idxGstRegBase = X86_GREG_xDX; break;
6437 case 3: idxGstRegBase = X86_GREG_xBX; break;
6438 case 4: /* SIB */
6439 {
6440 /* index /w scaling . */
6441 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6442 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6443 {
6444 case 0: idxGstRegIndex = X86_GREG_xAX; break;
6445 case 1: idxGstRegIndex = X86_GREG_xCX; break;
6446 case 2: idxGstRegIndex = X86_GREG_xDX; break;
6447 case 3: idxGstRegIndex = X86_GREG_xBX; break;
6448 case 4: cShiftIndex = 0; /*no index*/ break;
6449 case 5: idxGstRegIndex = X86_GREG_xBP; break;
6450 case 6: idxGstRegIndex = X86_GREG_xSI; break;
6451 case 7: idxGstRegIndex = X86_GREG_xDI; break;
6452 }
6453
6454 /* base */
6455 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
6456 {
6457 case 0: idxGstRegBase = X86_GREG_xAX; break;
6458 case 1: idxGstRegBase = X86_GREG_xCX; break;
6459 case 2: idxGstRegBase = X86_GREG_xDX; break;
6460 case 3: idxGstRegBase = X86_GREG_xBX; break;
6461 case 4:
6462 idxGstRegBase = X86_GREG_xSP;
6463 u32EffAddr += uSibAndRspOffset >> 8;
6464 break;
6465 case 5:
6466 if ((bRm & X86_MODRM_MOD_MASK) != 0)
6467 idxGstRegBase = X86_GREG_xBP;
6468 else
6469 {
6470 Assert(u32EffAddr == 0);
6471 u32EffAddr = u32Disp;
6472 }
6473 break;
6474 case 6: idxGstRegBase = X86_GREG_xSI; break;
6475 case 7: idxGstRegBase = X86_GREG_xDI; break;
6476 }
6477 break;
6478 }
6479 case 5: idxGstRegBase = X86_GREG_xBP; break;
6480 case 6: idxGstRegBase = X86_GREG_xSI; break;
6481 case 7: idxGstRegBase = X86_GREG_xDI; break;
6482 }
6483
6484 /*
6485 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6486 * the start of the function.
6487 */
6488 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6489 {
6490 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
6491 return off;
6492 }
6493
6494 /*
6495 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6496 */
6497 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6498 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6499 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6500 kIemNativeGstRegUse_ReadOnly);
6501 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6502 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6503 kIemNativeGstRegUse_ReadOnly);
6504
6505 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6506 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6507 {
6508 idxRegBase = idxRegIndex;
6509 idxRegIndex = UINT8_MAX;
6510 }
6511
6512#ifdef RT_ARCH_AMD64
6513 if (idxRegIndex == UINT8_MAX)
6514 {
6515 if (u32EffAddr == 0)
6516 {
6517 /* mov ret, base */
6518 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6519 }
6520 else
6521 {
6522 /* lea ret32, [base64 + disp32] */
6523 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6524 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6525 if (idxRegRet >= 8 || idxRegBase >= 8)
6526 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6527 pbCodeBuf[off++] = 0x8d;
6528 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6529 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6530 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6531 else
6532 {
6533 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6534 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6535 }
6536 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6537 if (bMod == X86_MOD_MEM4)
6538 {
6539 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6540 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6541 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6542 }
6543 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6544 }
6545 }
6546 else
6547 {
6548 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6549 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6550 if (idxRegBase == UINT8_MAX)
6551 {
6552 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
6553 if (idxRegRet >= 8 || idxRegIndex >= 8)
6554 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6555 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6556 pbCodeBuf[off++] = 0x8d;
6557 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6558 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6559 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6560 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6561 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6562 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6563 }
6564 else
6565 {
6566 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6567 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6568 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6569 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6570 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6571 pbCodeBuf[off++] = 0x8d;
6572 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6573 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6574 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6575 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6576 if (bMod != X86_MOD_MEM0)
6577 {
6578 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6579 if (bMod == X86_MOD_MEM4)
6580 {
6581 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6582 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6583 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6584 }
6585 }
6586 }
6587 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6588 }
6589
6590#elif defined(RT_ARCH_ARM64)
6591 if (u32EffAddr == 0)
6592 {
6593 if (idxRegIndex == UINT8_MAX)
6594 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6595 else if (idxRegBase == UINT8_MAX)
6596 {
6597 if (cShiftIndex == 0)
6598 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
6599 else
6600 {
6601 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6602 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6603 }
6604 }
6605 else
6606 {
6607 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6608 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6609 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6610 }
6611 }
6612 else
6613 {
6614 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6615 {
6616 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6617 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6618 }
6619 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6620 {
6621 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6622 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6623 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6624 }
6625 else
6626 {
6627 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6628 if (idxRegBase != UINT8_MAX)
6629 {
6630 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6631 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6632 }
6633 }
6634 if (idxRegIndex != UINT8_MAX)
6635 {
6636 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6637 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6638 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6639 }
6640 }
6641
6642#else
6643# error "port me"
6644#endif
6645
6646 if (idxRegIndex != UINT8_MAX)
6647 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6648 if (idxRegBase != UINT8_MAX)
6649 iemNativeRegFreeTmp(pReNative, idxRegBase);
6650 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6651 return off;
6652}
6653
6654
6655#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6656 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6657 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6658
6659#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6660 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6661 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6662
6663#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6664 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6665 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6666
6667/**
6668 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6669 *
6670 * @returns New off.
6671 * @param pReNative .
6672 * @param off .
6673 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6674 * bit 4 to REX.X. The two bits are part of the
6675 * REG sub-field, which isn't needed in this
6676 * function.
6677 * @param uSibAndRspOffset Two parts:
6678 * - The first 8 bits make up the SIB byte.
6679 * - The next 8 bits are the fixed RSP/ESP offset
6680 * in case of a pop [xSP].
6681 * @param u32Disp The displacement byte/word/dword, if any.
6682 * @param cbInstr The size of the fully decoded instruction. Used
6683 * for RIP relative addressing.
6684 * @param idxVarRet The result variable number.
6685 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6686 * when calculating the address.
6687 *
6688 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6689 */
6690DECL_INLINE_THROW(uint32_t)
6691iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6692 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6693{
6694 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6695
6696 /*
6697 * Special case the rip + disp32 form first.
6698 */
6699 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6700 {
6701 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6702 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6703 kIemNativeGstRegUse_ReadOnly);
6704 if (f64Bit)
6705 {
6706#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6707 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr + (int64_t)pReNative->Core.offPc;
6708#else
6709 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6710#endif
6711#ifdef RT_ARCH_AMD64
6712 if ((int32_t)offFinalDisp == offFinalDisp)
6713 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6714 else
6715 {
6716 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6717 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6718 }
6719#else
6720 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6721#endif
6722 }
6723 else
6724 {
6725# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6726 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr + (int32_t)pReNative->Core.offPc;
6727# else
6728 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr;
6729# endif
6730 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6731 }
6732 iemNativeRegFreeTmp(pReNative, idxRegPc);
6733 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6734 return off;
6735 }
6736
6737 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6738 int64_t i64EffAddr = 0;
6739 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6740 {
6741 case 0: break;
6742 case 1: i64EffAddr = (int8_t)u32Disp; break;
6743 case 2: i64EffAddr = (int32_t)u32Disp; break;
6744 default: AssertFailed();
6745 }
6746
6747 /* Get the register (or SIB) value. */
6748 uint8_t idxGstRegBase = UINT8_MAX;
6749 uint8_t idxGstRegIndex = UINT8_MAX;
6750 uint8_t cShiftIndex = 0;
6751 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6752 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6753 else /* SIB: */
6754 {
6755 /* index /w scaling . */
6756 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6757 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6758 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6759 if (idxGstRegIndex == 4)
6760 {
6761 /* no index */
6762 cShiftIndex = 0;
6763 idxGstRegIndex = UINT8_MAX;
6764 }
6765
6766 /* base */
6767 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6768 if (idxGstRegBase == 4)
6769 {
6770 /* pop [rsp] hack */
6771 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6772 }
6773 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6774 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6775 {
6776 /* mod=0 and base=5 -> disp32, no base reg. */
6777 Assert(i64EffAddr == 0);
6778 i64EffAddr = (int32_t)u32Disp;
6779 idxGstRegBase = UINT8_MAX;
6780 }
6781 }
6782
6783 /*
6784 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6785 * the start of the function.
6786 */
6787 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6788 {
6789 if (f64Bit)
6790 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6791 else
6792 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6793 return off;
6794 }
6795
6796 /*
6797 * Now emit code that calculates:
6798 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6799 * or if !f64Bit:
6800 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6801 */
6802 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6803 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6804 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6805 kIemNativeGstRegUse_ReadOnly);
6806 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6807 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6808 kIemNativeGstRegUse_ReadOnly);
6809
6810 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6811 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6812 {
6813 idxRegBase = idxRegIndex;
6814 idxRegIndex = UINT8_MAX;
6815 }
6816
6817#ifdef RT_ARCH_AMD64
6818 uint8_t bFinalAdj;
6819 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6820 bFinalAdj = 0; /* likely */
6821 else
6822 {
6823 /* pop [rsp] with a problematic disp32 value. Split out the
6824 RSP offset and add it separately afterwards (bFinalAdj). */
6825 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6826 Assert(idxGstRegBase == X86_GREG_xSP);
6827 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6828 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6829 Assert(bFinalAdj != 0);
6830 i64EffAddr -= bFinalAdj;
6831 Assert((int32_t)i64EffAddr == i64EffAddr);
6832 }
6833 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6834//pReNative->pInstrBuf[off++] = 0xcc;
6835
6836 if (idxRegIndex == UINT8_MAX)
6837 {
6838 if (u32EffAddr == 0)
6839 {
6840 /* mov ret, base */
6841 if (f64Bit)
6842 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6843 else
6844 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6845 }
6846 else
6847 {
6848 /* lea ret, [base + disp32] */
6849 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6850 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6851 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6852 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6853 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6854 | (f64Bit ? X86_OP_REX_W : 0);
6855 pbCodeBuf[off++] = 0x8d;
6856 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6857 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6858 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6859 else
6860 {
6861 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6862 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6863 }
6864 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6865 if (bMod == X86_MOD_MEM4)
6866 {
6867 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6868 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6869 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6870 }
6871 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6872 }
6873 }
6874 else
6875 {
6876 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6877 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6878 if (idxRegBase == UINT8_MAX)
6879 {
6880 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6881 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6882 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6883 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6884 | (f64Bit ? X86_OP_REX_W : 0);
6885 pbCodeBuf[off++] = 0x8d;
6886 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6887 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6888 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6889 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6890 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6891 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6892 }
6893 else
6894 {
6895 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6896 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6897 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6898 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6899 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6900 | (f64Bit ? X86_OP_REX_W : 0);
6901 pbCodeBuf[off++] = 0x8d;
6902 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6903 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6904 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6905 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6906 if (bMod != X86_MOD_MEM0)
6907 {
6908 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6909 if (bMod == X86_MOD_MEM4)
6910 {
6911 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6912 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6913 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6914 }
6915 }
6916 }
6917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6918 }
6919
6920 if (!bFinalAdj)
6921 { /* likely */ }
6922 else
6923 {
6924 Assert(f64Bit);
6925 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6926 }
6927
6928#elif defined(RT_ARCH_ARM64)
6929 if (i64EffAddr == 0)
6930 {
6931 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6932 if (idxRegIndex == UINT8_MAX)
6933 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6934 else if (idxRegBase != UINT8_MAX)
6935 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6936 f64Bit, false /*fSetFlags*/, cShiftIndex);
6937 else
6938 {
6939 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6940 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6941 }
6942 }
6943 else
6944 {
6945 if (f64Bit)
6946 { /* likely */ }
6947 else
6948 i64EffAddr = (int32_t)i64EffAddr;
6949
6950 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6951 {
6952 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6953 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
6954 }
6955 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
6956 {
6957 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6958 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
6959 }
6960 else
6961 {
6962 if (f64Bit)
6963 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
6964 else
6965 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
6966 if (idxRegBase != UINT8_MAX)
6967 {
6968 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6969 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
6970 }
6971 }
6972 if (idxRegIndex != UINT8_MAX)
6973 {
6974 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6975 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6976 f64Bit, false /*fSetFlags*/, cShiftIndex);
6977 }
6978 }
6979
6980#else
6981# error "port me"
6982#endif
6983
6984 if (idxRegIndex != UINT8_MAX)
6985 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6986 if (idxRegBase != UINT8_MAX)
6987 iemNativeRegFreeTmp(pReNative, idxRegBase);
6988 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6989 return off;
6990}
6991
6992
6993/*********************************************************************************************************************************
6994* Memory fetches and stores common *
6995*********************************************************************************************************************************/
6996
6997typedef enum IEMNATIVEMITMEMOP
6998{
6999 kIemNativeEmitMemOp_Store = 0,
7000 kIemNativeEmitMemOp_Fetch,
7001 kIemNativeEmitMemOp_Fetch_Zx_U16,
7002 kIemNativeEmitMemOp_Fetch_Zx_U32,
7003 kIemNativeEmitMemOp_Fetch_Zx_U64,
7004 kIemNativeEmitMemOp_Fetch_Sx_U16,
7005 kIemNativeEmitMemOp_Fetch_Sx_U32,
7006 kIemNativeEmitMemOp_Fetch_Sx_U64
7007} IEMNATIVEMITMEMOP;
7008
7009/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
7010 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
7011 * (with iSegReg = UINT8_MAX). */
7012DECL_INLINE_THROW(uint32_t)
7013iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
7014 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
7015 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
7016{
7017 /*
7018 * Assert sanity.
7019 */
7020 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7021 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7022 Assert( enmOp != kIemNativeEmitMemOp_Store
7023 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7024 || pVarValue->enmKind == kIemNativeVarKind_Stack);
7025 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7026 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7027 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7028 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7029 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7030 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7031#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7032 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
7033 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
7034#else
7035 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
7036#endif
7037 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
7038 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7039#ifdef VBOX_STRICT
7040 if (iSegReg == UINT8_MAX)
7041 {
7042 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7043 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7044 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7045 switch (cbMem)
7046 {
7047 case 1:
7048 Assert( pfnFunction
7049 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
7050 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7051 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7052 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7053 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7054 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
7055 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
7056 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
7057 : UINT64_C(0xc000b000a0009000) ));
7058 Assert(!fAlignMaskAndCtl);
7059 break;
7060 case 2:
7061 Assert( pfnFunction
7062 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
7063 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7064 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7065 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7066 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
7067 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
7068 : UINT64_C(0xc000b000a0009000) ));
7069 Assert(fAlignMaskAndCtl <= 1);
7070 break;
7071 case 4:
7072 Assert( pfnFunction
7073 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
7074 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7075 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7076 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
7077 : UINT64_C(0xc000b000a0009000) ));
7078 Assert(fAlignMaskAndCtl <= 3);
7079 break;
7080 case 8:
7081 Assert( pfnFunction
7082 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
7083 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
7084 : UINT64_C(0xc000b000a0009000) ));
7085 Assert(fAlignMaskAndCtl <= 7);
7086 break;
7087#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7088 case sizeof(RTUINT128U):
7089 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7090 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
7091 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7092 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
7093 || ( enmOp == kIemNativeEmitMemOp_Store
7094 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7095 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
7096 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7097 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7098 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
7099 : fAlignMaskAndCtl <= 15);
7100 break;
7101 case sizeof(RTUINT256U):
7102 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7103 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
7104 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
7105 || ( enmOp == kIemNativeEmitMemOp_Store
7106 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
7107 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
7108 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
7109 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
7110 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
7111 : fAlignMaskAndCtl <= 31);
7112 break;
7113#endif
7114 }
7115 }
7116 else
7117 {
7118 Assert(iSegReg < 6);
7119 switch (cbMem)
7120 {
7121 case 1:
7122 Assert( pfnFunction
7123 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
7124 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
7125 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7126 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7127 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7128 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
7129 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
7130 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
7131 : UINT64_C(0xc000b000a0009000) ));
7132 Assert(!fAlignMaskAndCtl);
7133 break;
7134 case 2:
7135 Assert( pfnFunction
7136 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
7137 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
7138 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7139 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7140 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
7141 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
7142 : UINT64_C(0xc000b000a0009000) ));
7143 Assert(fAlignMaskAndCtl <= 1);
7144 break;
7145 case 4:
7146 Assert( pfnFunction
7147 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
7148 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
7149 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
7150 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
7151 : UINT64_C(0xc000b000a0009000) ));
7152 Assert(fAlignMaskAndCtl <= 3);
7153 break;
7154 case 8:
7155 Assert( pfnFunction
7156 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
7157 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
7158 : UINT64_C(0xc000b000a0009000) ));
7159 Assert(fAlignMaskAndCtl <= 7);
7160 break;
7161#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7162 case sizeof(RTUINT128U):
7163 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7164 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
7165 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7166 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
7167 || ( enmOp == kIemNativeEmitMemOp_Store
7168 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7169 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
7170 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7171 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7172 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
7173 : fAlignMaskAndCtl <= 15);
7174 break;
7175 case sizeof(RTUINT256U):
7176 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7177 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
7178 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
7179 || ( enmOp == kIemNativeEmitMemOp_Store
7180 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
7181 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
7182 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
7183 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
7184 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
7185 : fAlignMaskAndCtl <= 31);
7186 break;
7187#endif
7188 }
7189 }
7190#endif
7191
7192#ifdef VBOX_STRICT
7193 /*
7194 * Check that the fExec flags we've got make sense.
7195 */
7196 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7197#endif
7198
7199 /*
7200 * To keep things simple we have to commit any pending writes first as we
7201 * may end up making calls.
7202 */
7203 /** @todo we could postpone this till we make the call and reload the
7204 * registers after returning from the call. Not sure if that's sensible or
7205 * not, though. */
7206#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7207 off = iemNativeRegFlushPendingWrites(pReNative, off);
7208#else
7209 /* The program counter is treated differently for now. */
7210 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
7211#endif
7212
7213#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7214 /*
7215 * Move/spill/flush stuff out of call-volatile registers.
7216 * This is the easy way out. We could contain this to the tlb-miss branch
7217 * by saving and restoring active stuff here.
7218 */
7219 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7220#endif
7221
7222 /*
7223 * Define labels and allocate the result register (trying for the return
7224 * register if we can).
7225 */
7226 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7227#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7228 uint8_t idxRegValueFetch = UINT8_MAX;
7229
7230 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7231 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7232 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
7233 else
7234 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7235 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7236 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7237 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7238#else
7239 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7240 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7241 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7242 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7243#endif
7244 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
7245
7246#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7247 uint8_t idxRegValueStore = UINT8_MAX;
7248
7249 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7250 idxRegValueStore = !TlbState.fSkip
7251 && enmOp == kIemNativeEmitMemOp_Store
7252 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7253 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7254 : UINT8_MAX;
7255 else
7256 idxRegValueStore = !TlbState.fSkip
7257 && enmOp == kIemNativeEmitMemOp_Store
7258 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7259 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7260 : UINT8_MAX;
7261
7262#else
7263 uint8_t const idxRegValueStore = !TlbState.fSkip
7264 && enmOp == kIemNativeEmitMemOp_Store
7265 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7266 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7267 : UINT8_MAX;
7268#endif
7269 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7270 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7271 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7272 : UINT32_MAX;
7273
7274 /*
7275 * Jump to the TLB lookup code.
7276 */
7277 if (!TlbState.fSkip)
7278 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7279
7280 /*
7281 * TlbMiss:
7282 *
7283 * Call helper to do the fetching.
7284 * We flush all guest register shadow copies here.
7285 */
7286 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7287
7288#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7289 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7290#else
7291 RT_NOREF(idxInstr);
7292#endif
7293
7294#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7295 if (pReNative->Core.offPc)
7296 {
7297 /*
7298 * Update the program counter but restore it at the end of the TlbMiss branch.
7299 * This should allow delaying more program counter updates for the TlbLookup and hit paths
7300 * which are hopefully much more frequent, reducing the amount of memory accesses.
7301 */
7302 /* Allocate a temporary PC register. */
7303/** @todo r=bird: This would technically need to be done up front as it's a register allocation. */
7304 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7305 kIemNativeGstRegUse_ForUpdate);
7306
7307 /* Perform the addition and store the result. */
7308 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7309 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7310# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
7311 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
7312# endif
7313
7314 /* Free and flush the PC register. */
7315 iemNativeRegFreeTmp(pReNative, idxPcReg);
7316 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7317 }
7318#endif
7319
7320#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7321 /* Save variables in volatile registers. */
7322 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7323 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
7324 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
7325 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7326#endif
7327
7328 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
7329 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
7330#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7331 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7332 {
7333 /*
7334 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
7335 *
7336 * @note There was a register variable assigned to the variable for the TlbLookup case above
7337 * which must not be freed or the value loaded into the register will not be synced into the register
7338 * further down the road because the variable doesn't know it had a variable assigned.
7339 *
7340 * @note For loads it is not required to sync what is in the assigned register with the stack slot
7341 * as it will be overwritten anyway.
7342 */
7343 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7344 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
7345 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
7346 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7347 }
7348 else
7349#endif
7350 if (enmOp == kIemNativeEmitMemOp_Store)
7351 {
7352 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7353 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
7354#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7355 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7356#else
7357 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7358 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7359#endif
7360 }
7361
7362 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
7363 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
7364#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7365 fVolGregMask);
7366#else
7367 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
7368#endif
7369
7370 if (iSegReg != UINT8_MAX)
7371 {
7372 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
7373 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7374 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
7375 }
7376
7377 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7378 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7379
7380 /* Done setting up parameters, make the call. */
7381 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7382
7383 /*
7384 * Put the result in the right register if this is a fetch.
7385 */
7386 if (enmOp != kIemNativeEmitMemOp_Store)
7387 {
7388#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7389 if ( cbMem == sizeof(RTUINT128U)
7390 || cbMem == sizeof(RTUINT256U))
7391 {
7392 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
7393
7394 /* Sync the value on the stack with the host register assigned to the variable. */
7395 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
7396 }
7397 else
7398#endif
7399 {
7400 Assert(idxRegValueFetch == pVarValue->idxReg);
7401 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7402 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
7403 }
7404 }
7405
7406#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7407 /* Restore variables and guest shadow registers to volatile registers. */
7408 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7409 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7410#endif
7411
7412#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7413 if (pReNative->Core.offPc)
7414 {
7415 /*
7416 * Time to restore the program counter to its original value.
7417 */
7418 /* Allocate a temporary PC register. */
7419 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7420 kIemNativeGstRegUse_ForUpdate);
7421
7422 /* Restore the original value. */
7423 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7424 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7425
7426 /* Free and flush the PC register. */
7427 iemNativeRegFreeTmp(pReNative, idxPcReg);
7428 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7429 }
7430#endif
7431
7432#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7433 if (!TlbState.fSkip)
7434 {
7435 /* end of TlbMiss - Jump to the done label. */
7436 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7437 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7438
7439 /*
7440 * TlbLookup:
7441 */
7442 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
7443 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
7444 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
7445
7446 /*
7447 * Emit code to do the actual storing / fetching.
7448 */
7449 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7450# ifdef IEM_WITH_TLB_STATISTICS
7451 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7452 enmOp == kIemNativeEmitMemOp_Store
7453 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
7454 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
7455# endif
7456 switch (enmOp)
7457 {
7458 case kIemNativeEmitMemOp_Store:
7459 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
7460 {
7461 switch (cbMem)
7462 {
7463 case 1:
7464 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7465 break;
7466 case 2:
7467 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7468 break;
7469 case 4:
7470 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7471 break;
7472 case 8:
7473 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7474 break;
7475#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7476 case sizeof(RTUINT128U):
7477 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7478 break;
7479 case sizeof(RTUINT256U):
7480 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7481 break;
7482#endif
7483 default:
7484 AssertFailed();
7485 }
7486 }
7487 else
7488 {
7489 switch (cbMem)
7490 {
7491 case 1:
7492 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
7493 idxRegMemResult, TlbState.idxReg1);
7494 break;
7495 case 2:
7496 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7497 idxRegMemResult, TlbState.idxReg1);
7498 break;
7499 case 4:
7500 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7501 idxRegMemResult, TlbState.idxReg1);
7502 break;
7503 case 8:
7504 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
7505 idxRegMemResult, TlbState.idxReg1);
7506 break;
7507 default:
7508 AssertFailed();
7509 }
7510 }
7511 break;
7512
7513 case kIemNativeEmitMemOp_Fetch:
7514 case kIemNativeEmitMemOp_Fetch_Zx_U16:
7515 case kIemNativeEmitMemOp_Fetch_Zx_U32:
7516 case kIemNativeEmitMemOp_Fetch_Zx_U64:
7517 switch (cbMem)
7518 {
7519 case 1:
7520 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7521 break;
7522 case 2:
7523 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7524 break;
7525 case 4:
7526 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7527 break;
7528 case 8:
7529 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7530 break;
7531#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7532 case sizeof(RTUINT128U):
7533 /*
7534 * No need to sync back the register with the stack, this is done by the generic variable handling
7535 * code if there is a register assigned to a variable and the stack must be accessed.
7536 */
7537 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7538 break;
7539 case sizeof(RTUINT256U):
7540 /*
7541 * No need to sync back the register with the stack, this is done by the generic variable handling
7542 * code if there is a register assigned to a variable and the stack must be accessed.
7543 */
7544 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7545 break;
7546#endif
7547 default:
7548 AssertFailed();
7549 }
7550 break;
7551
7552 case kIemNativeEmitMemOp_Fetch_Sx_U16:
7553 Assert(cbMem == 1);
7554 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7555 break;
7556
7557 case kIemNativeEmitMemOp_Fetch_Sx_U32:
7558 Assert(cbMem == 1 || cbMem == 2);
7559 if (cbMem == 1)
7560 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7561 else
7562 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7563 break;
7564
7565 case kIemNativeEmitMemOp_Fetch_Sx_U64:
7566 switch (cbMem)
7567 {
7568 case 1:
7569 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7570 break;
7571 case 2:
7572 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7573 break;
7574 case 4:
7575 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7576 break;
7577 default:
7578 AssertFailed();
7579 }
7580 break;
7581
7582 default:
7583 AssertFailed();
7584 }
7585
7586 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7587
7588 /*
7589 * TlbDone:
7590 */
7591 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7592
7593 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7594
7595# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7596 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7597 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7598# endif
7599 }
7600#else
7601 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
7602#endif
7603
7604 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7605 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7606 return off;
7607}
7608
7609
7610
7611/*********************************************************************************************************************************
7612* Memory fetches (IEM_MEM_FETCH_XXX). *
7613*********************************************************************************************************************************/
7614
7615/* 8-bit segmented: */
7616#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7617 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
7618 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7619 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7620
7621#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7622 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7623 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7624 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7625
7626#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7627 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7628 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7629 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7630
7631#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7632 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7633 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7634 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7635
7636#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7637 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7638 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7639 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7640
7641#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7642 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7643 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7644 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7645
7646#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7647 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7648 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7649 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7650
7651/* 16-bit segmented: */
7652#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7653 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7654 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7655 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7656
7657#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7658 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7659 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7660 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7661
7662#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7663 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7664 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7665 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7666
7667#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7668 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7669 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7670 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7671
7672#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7673 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7674 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7675 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7676
7677#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7678 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7679 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7680 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7681
7682
7683/* 32-bit segmented: */
7684#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7685 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7686 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7687 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7688
7689#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7690 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7691 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7692 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7693
7694#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7695 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7696 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7697 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7698
7699#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7700 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7701 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7702 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7703
7704#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7705 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7706 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7707 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7708
7709#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7710 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7711 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7712 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7713
7714#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7715 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7716 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7717 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7718
7719#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7720 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7721 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7722 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7723
7724#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7725 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
7726 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7727 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7728
7729AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7730#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7731 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
7732 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7733 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7734
7735
7736/* 64-bit segmented: */
7737#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7738 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7739 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7740 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7741
7742AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7743#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7744 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
7745 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7746 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7747
7748
7749/* 8-bit flat: */
7750#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7751 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
7752 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7753 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7754
7755#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7756 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7757 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7758 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7759
7760#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7761 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7762 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7763 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7764
7765#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7766 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7767 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7768 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7769
7770#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7771 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7772 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7773 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7774
7775#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7776 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7777 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7778 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7779
7780#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7781 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7782 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7783 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7784
7785
7786/* 16-bit flat: */
7787#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7788 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7789 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7790 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7791
7792#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7793 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7794 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7795 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7796
7797#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7798 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7799 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7800 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7801
7802#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7803 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7804 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7805 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7806
7807#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7808 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7809 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7810 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7811
7812#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7813 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7814 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7815 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7816
7817/* 32-bit flat: */
7818#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7819 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7820 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7821 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7822
7823#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7824 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7825 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7826 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7827
7828#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7829 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7830 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7831 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7832
7833#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7834 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7835 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7836 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7837
7838#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7839 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7840 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7841 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7842
7843#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7844 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7845 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7846 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7847
7848#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7849 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7850 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7851 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7852
7853#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7854 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7855 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7856 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7857
7858#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7859 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7860 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7861 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7862
7863#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7864 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7865 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7866 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7867
7868
7869/* 64-bit flat: */
7870#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7871 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7872 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7873 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7874
7875#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7876 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7877 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7878 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7879
7880#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7881/* 128-bit segmented: */
7882#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7883 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7884 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7885 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7886
7887#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7888 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7889 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7890 kIemNativeEmitMemOp_Fetch, \
7891 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7892
7893AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7894#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7895 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
7896 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7897 kIemNativeEmitMemOp_Fetch, \
7898 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7899
7900#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7901 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7902 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7903 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7904
7905#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7906 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7907 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7908 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7909
7910
7911/* 128-bit flat: */
7912#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7913 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7914 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7915 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7916
7917#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7918 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7919 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7920 kIemNativeEmitMemOp_Fetch, \
7921 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7922
7923#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7924 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
7925 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7926 kIemNativeEmitMemOp_Fetch, \
7927 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7928
7929#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7930 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7931 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7932 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7933
7934#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7935 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
7936 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7937 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7938
7939/* 256-bit segmented: */
7940#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7941 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7942 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7943 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7944
7945#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7946 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7947 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7948 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7949
7950#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7951 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7952 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7953 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7954
7955#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7956 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7957 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7958 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7959
7960
7961/* 256-bit flat: */
7962#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
7963 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7964 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7965 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7966
7967#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
7968 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7969 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7970 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7971
7972#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
7973 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7974 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7975 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7976
7977#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
7978 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, \
7979 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7980 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7981
7982#endif
7983
7984
7985/*********************************************************************************************************************************
7986* Memory stores (IEM_MEM_STORE_XXX). *
7987*********************************************************************************************************************************/
7988
7989#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
7990 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
7991 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7992 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7993
7994#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
7995 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
7996 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7997 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7998
7999#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
8000 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
8001 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
8002 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8003
8004#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
8005 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
8006 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
8007 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8008
8009
8010#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
8011 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
8012 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
8013 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8014
8015#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
8016 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
8017 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
8018 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8019
8020#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
8021 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
8022 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
8023 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8024
8025#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
8026 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
8027 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
8028 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8029
8030
8031#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
8032 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8033 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8034
8035#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
8036 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8037 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8038
8039#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
8040 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8041 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8042
8043#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
8044 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8045 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8046
8047
8048#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
8049 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8050 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8051
8052#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
8053 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8054 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8055
8056#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
8057 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8058 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8059
8060#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
8061 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8062 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8063
8064/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
8065 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
8066DECL_INLINE_THROW(uint32_t)
8067iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
8068 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
8069{
8070 /*
8071 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
8072 * to do the grunt work.
8073 */
8074 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
8075 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
8076 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
8077 pfnFunction, idxInstr);
8078 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
8079 return off;
8080}
8081
8082
8083#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8084# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
8085 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8086 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8087 kIemNativeEmitMemOp_Store, \
8088 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
8089
8090# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
8091 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
8092 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
8093 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
8094
8095# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
8096 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
8097 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
8098 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
8099
8100# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
8101 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
8102 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8103 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8104
8105
8106# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
8107 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8108 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8109 kIemNativeEmitMemOp_Store, \
8110 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
8111
8112# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
8113 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
8114 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
8115 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
8116
8117# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
8118 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
8119 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
8120 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
8121
8122# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
8123 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
8124 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8125 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8126#endif
8127
8128
8129
8130/*********************************************************************************************************************************
8131* Stack Accesses. *
8132*********************************************************************************************************************************/
8133/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
8134#define IEM_MC_PUSH_U16(a_u16Value) \
8135 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8136 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
8137#define IEM_MC_PUSH_U32(a_u32Value) \
8138 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8139 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
8140#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
8141 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
8142 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
8143#define IEM_MC_PUSH_U64(a_u64Value) \
8144 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8145 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
8146
8147#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
8148 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8149 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8150#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
8151 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8152 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
8153#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
8154 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
8155 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
8156
8157#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
8158 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8159 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8160#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
8161 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8162 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
8163
8164
8165/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
8166DECL_INLINE_THROW(uint32_t)
8167iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
8168 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8169{
8170 /*
8171 * Assert sanity.
8172 */
8173 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8174 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
8175#ifdef VBOX_STRICT
8176 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8177 {
8178 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8179 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8180 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8181 Assert( pfnFunction
8182 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8183 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
8184 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
8185 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8186 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
8187 : UINT64_C(0xc000b000a0009000) ));
8188 }
8189 else
8190 Assert( pfnFunction
8191 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
8192 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
8193 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
8194 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
8195 : UINT64_C(0xc000b000a0009000) ));
8196#endif
8197
8198#ifdef VBOX_STRICT
8199 /*
8200 * Check that the fExec flags we've got make sense.
8201 */
8202 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8203#endif
8204
8205 /*
8206 * To keep things simple we have to commit any pending writes first as we
8207 * may end up making calls.
8208 */
8209 /** @todo we could postpone this till we make the call and reload the
8210 * registers after returning from the call. Not sure if that's sensible or
8211 * not, though. */
8212 off = iemNativeRegFlushPendingWrites(pReNative, off);
8213
8214 /*
8215 * First we calculate the new RSP and the effective stack pointer value.
8216 * For 64-bit mode and flat 32-bit these two are the same.
8217 * (Code structure is very similar to that of PUSH)
8218 */
8219 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8220 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
8221 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
8222 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
8223 ? cbMem : sizeof(uint16_t);
8224 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8225 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8226 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8227 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8228 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8229 if (cBitsFlat != 0)
8230 {
8231 Assert(idxRegEffSp == idxRegRsp);
8232 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8233 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8234 if (cBitsFlat == 64)
8235 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
8236 else
8237 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
8238 }
8239 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8240 {
8241 Assert(idxRegEffSp != idxRegRsp);
8242 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8243 kIemNativeGstRegUse_ReadOnly);
8244#ifdef RT_ARCH_AMD64
8245 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8246#else
8247 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8248#endif
8249 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8250 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8251 offFixupJumpToUseOtherBitSp = off;
8252 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8253 {
8254 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8255 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8256 }
8257 else
8258 {
8259 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8260 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8261 }
8262 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8263 }
8264 /* SpUpdateEnd: */
8265 uint32_t const offLabelSpUpdateEnd = off;
8266
8267 /*
8268 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8269 * we're skipping lookup).
8270 */
8271 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8272 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
8273 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8274 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8275 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8276 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8277 : UINT32_MAX;
8278 uint8_t const idxRegValue = !TlbState.fSkip
8279 && pVarValue->enmKind != kIemNativeVarKind_Immediate
8280 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
8281 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
8282 : UINT8_MAX;
8283 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
8284
8285
8286 if (!TlbState.fSkip)
8287 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8288 else
8289 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8290
8291 /*
8292 * Use16BitSp:
8293 */
8294 if (cBitsFlat == 0)
8295 {
8296#ifdef RT_ARCH_AMD64
8297 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8298#else
8299 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8300#endif
8301 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8302 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8303 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8304 else
8305 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8306 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8307 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8308 }
8309
8310 /*
8311 * TlbMiss:
8312 *
8313 * Call helper to do the pushing.
8314 */
8315 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8316
8317#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8318 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8319#else
8320 RT_NOREF(idxInstr);
8321#endif
8322
8323 /* Save variables in volatile registers. */
8324 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8325 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8326 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
8327 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
8328 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8329
8330 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
8331 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
8332 {
8333 /* Swap them using ARG0 as temp register: */
8334 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
8335 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
8336 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
8337 }
8338 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
8339 {
8340 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
8341 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
8342 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8343
8344 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
8345 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8346 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8347 }
8348 else
8349 {
8350 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
8351 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8352
8353 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
8354 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
8355 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
8356 }
8357
8358 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8359 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8360
8361 /* Done setting up parameters, make the call. */
8362 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8363
8364 /* Restore variables and guest shadow registers to volatile registers. */
8365 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8366 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8367
8368#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8369 if (!TlbState.fSkip)
8370 {
8371 /* end of TlbMiss - Jump to the done label. */
8372 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8373 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8374
8375 /*
8376 * TlbLookup:
8377 */
8378 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
8379 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8380
8381 /*
8382 * Emit code to do the actual storing / fetching.
8383 */
8384 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
8385# ifdef IEM_WITH_TLB_STATISTICS
8386 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8387 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8388# endif
8389 if (idxRegValue != UINT8_MAX)
8390 {
8391 switch (cbMemAccess)
8392 {
8393 case 2:
8394 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8395 break;
8396 case 4:
8397 if (!fIsIntelSeg)
8398 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8399 else
8400 {
8401 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
8402 PUSH FS in real mode, so we have to try emulate that here.
8403 We borrow the now unused idxReg1 from the TLB lookup code here. */
8404 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
8405 kIemNativeGstReg_EFlags);
8406 if (idxRegEfl != UINT8_MAX)
8407 {
8408#ifdef ARCH_AMD64
8409 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
8410 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8411 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8412#else
8413 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
8414 off, TlbState.idxReg1, idxRegEfl,
8415 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8416#endif
8417 iemNativeRegFreeTmp(pReNative, idxRegEfl);
8418 }
8419 else
8420 {
8421 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
8422 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8423 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8424 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8425 }
8426 /* ASSUMES the upper half of idxRegValue is ZERO. */
8427 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
8428 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
8429 }
8430 break;
8431 case 8:
8432 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8433 break;
8434 default:
8435 AssertFailed();
8436 }
8437 }
8438 else
8439 {
8440 switch (cbMemAccess)
8441 {
8442 case 2:
8443 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
8444 idxRegMemResult, TlbState.idxReg1);
8445 break;
8446 case 4:
8447 Assert(!fIsSegReg);
8448 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
8449 idxRegMemResult, TlbState.idxReg1);
8450 break;
8451 case 8:
8452 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
8453 break;
8454 default:
8455 AssertFailed();
8456 }
8457 }
8458
8459 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8460 TlbState.freeRegsAndReleaseVars(pReNative);
8461
8462 /*
8463 * TlbDone:
8464 *
8465 * Commit the new RSP value.
8466 */
8467 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8468 }
8469#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8470
8471#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8472 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
8473#endif
8474 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8475 if (idxRegEffSp != idxRegRsp)
8476 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8477
8478 /* The value variable is implictly flushed. */
8479 if (idxRegValue != UINT8_MAX)
8480 iemNativeVarRegisterRelease(pReNative, idxVarValue);
8481 iemNativeVarFreeLocal(pReNative, idxVarValue);
8482
8483 return off;
8484}
8485
8486
8487
8488/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
8489#define IEM_MC_POP_GREG_U16(a_iGReg) \
8490 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8491 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
8492#define IEM_MC_POP_GREG_U32(a_iGReg) \
8493 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8494 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
8495#define IEM_MC_POP_GREG_U64(a_iGReg) \
8496 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8497 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
8498
8499#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
8500 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8501 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8502#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
8503 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8504 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
8505
8506#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
8507 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8508 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8509#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
8510 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8511 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
8512
8513
8514DECL_FORCE_INLINE_THROW(uint32_t)
8515iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
8516 uint8_t idxRegTmp)
8517{
8518 /* Use16BitSp: */
8519#ifdef RT_ARCH_AMD64
8520 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8521 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
8522 RT_NOREF(idxRegTmp);
8523#else
8524 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
8525 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
8526 /* add tmp, regrsp, #cbMem */
8527 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
8528 /* and tmp, tmp, #0xffff */
8529 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
8530 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
8531 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
8532 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
8533#endif
8534 return off;
8535}
8536
8537
8538DECL_FORCE_INLINE(uint32_t)
8539iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
8540{
8541 /* Use32BitSp: */
8542 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8543 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
8544 return off;
8545}
8546
8547
8548/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
8549DECL_INLINE_THROW(uint32_t)
8550iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
8551 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8552{
8553 /*
8554 * Assert sanity.
8555 */
8556 Assert(idxGReg < 16);
8557#ifdef VBOX_STRICT
8558 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8559 {
8560 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8561 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8562 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8563 Assert( pfnFunction
8564 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8565 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
8566 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8567 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
8568 : UINT64_C(0xc000b000a0009000) ));
8569 }
8570 else
8571 Assert( pfnFunction
8572 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
8573 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
8574 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
8575 : UINT64_C(0xc000b000a0009000) ));
8576#endif
8577
8578#ifdef VBOX_STRICT
8579 /*
8580 * Check that the fExec flags we've got make sense.
8581 */
8582 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8583#endif
8584
8585 /*
8586 * To keep things simple we have to commit any pending writes first as we
8587 * may end up making calls.
8588 */
8589 off = iemNativeRegFlushPendingWrites(pReNative, off);
8590
8591 /*
8592 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
8593 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
8594 * directly as the effective stack pointer.
8595 * (Code structure is very similar to that of PUSH)
8596 */
8597 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8598 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8599 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8600 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8601 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8602 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8603 * will be the resulting register value. */
8604 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8605
8606 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8607 if (cBitsFlat != 0)
8608 {
8609 Assert(idxRegEffSp == idxRegRsp);
8610 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8611 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8612 }
8613 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8614 {
8615 Assert(idxRegEffSp != idxRegRsp);
8616 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8617 kIemNativeGstRegUse_ReadOnly);
8618#ifdef RT_ARCH_AMD64
8619 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8620#else
8621 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8622#endif
8623 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8624 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8625 offFixupJumpToUseOtherBitSp = off;
8626 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8627 {
8628/** @todo can skip idxRegRsp updating when popping ESP. */
8629 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8630 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8631 }
8632 else
8633 {
8634 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8635 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8636 }
8637 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8638 }
8639 /* SpUpdateEnd: */
8640 uint32_t const offLabelSpUpdateEnd = off;
8641
8642 /*
8643 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8644 * we're skipping lookup).
8645 */
8646 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8647 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8648 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8649 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8650 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8651 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8652 : UINT32_MAX;
8653
8654 if (!TlbState.fSkip)
8655 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8656 else
8657 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8658
8659 /*
8660 * Use16BitSp:
8661 */
8662 if (cBitsFlat == 0)
8663 {
8664#ifdef RT_ARCH_AMD64
8665 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8666#else
8667 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8668#endif
8669 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8670 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8671 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8672 else
8673 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8674 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8675 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8676 }
8677
8678 /*
8679 * TlbMiss:
8680 *
8681 * Call helper to do the pushing.
8682 */
8683 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8684
8685#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8686 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8687#else
8688 RT_NOREF(idxInstr);
8689#endif
8690
8691 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8692 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8693 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8694 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8695
8696
8697 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8698 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8699 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8700
8701 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8702 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8703
8704 /* Done setting up parameters, make the call. */
8705 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8706
8707 /* Move the return register content to idxRegMemResult. */
8708 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8709 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8710
8711 /* Restore variables and guest shadow registers to volatile registers. */
8712 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8713 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8714
8715#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8716 if (!TlbState.fSkip)
8717 {
8718 /* end of TlbMiss - Jump to the done label. */
8719 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8720 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8721
8722 /*
8723 * TlbLookup:
8724 */
8725 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
8726 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8727
8728 /*
8729 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8730 */
8731 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8732# ifdef IEM_WITH_TLB_STATISTICS
8733 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8734 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8735# endif
8736 switch (cbMem)
8737 {
8738 case 2:
8739 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8740 break;
8741 case 4:
8742 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8743 break;
8744 case 8:
8745 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8746 break;
8747 default:
8748 AssertFailed();
8749 }
8750
8751 TlbState.freeRegsAndReleaseVars(pReNative);
8752
8753 /*
8754 * TlbDone:
8755 *
8756 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8757 * commit the popped register value.
8758 */
8759 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8760 }
8761#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8762
8763 if (idxGReg != X86_GREG_xSP)
8764 {
8765 /* Set the register. */
8766 if (cbMem >= sizeof(uint32_t))
8767 {
8768#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8769 AssertMsg( pReNative->idxCurCall == 0
8770 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8771 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8772 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8773#endif
8774 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8775#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8776 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8777#endif
8778#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8779 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8780 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8781#endif
8782 }
8783 else
8784 {
8785 Assert(cbMem == sizeof(uint16_t));
8786 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8787 kIemNativeGstRegUse_ForUpdate);
8788 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8789#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8790 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8791#endif
8792 iemNativeRegFreeTmp(pReNative, idxRegDst);
8793 }
8794
8795 /* Complete RSP calculation for FLAT mode. */
8796 if (idxRegEffSp == idxRegRsp)
8797 {
8798 if (cBitsFlat == 64)
8799 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8800 else
8801 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8802 }
8803 }
8804 else
8805 {
8806 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8807 if (cbMem == sizeof(uint64_t))
8808 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8809 else if (cbMem == sizeof(uint32_t))
8810 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8811 else
8812 {
8813 if (idxRegEffSp == idxRegRsp)
8814 {
8815 if (cBitsFlat == 64)
8816 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8817 else
8818 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8819 }
8820 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8821 }
8822 }
8823
8824#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8825 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8826#endif
8827
8828 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8829 if (idxRegEffSp != idxRegRsp)
8830 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8831 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8832
8833 return off;
8834}
8835
8836
8837
8838/*********************************************************************************************************************************
8839* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8840*********************************************************************************************************************************/
8841
8842#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8843 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8844 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8845 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8846
8847#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8848 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8849 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8850 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8851
8852#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8853 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8854 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8855 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8856
8857#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8858 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8859 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8860 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8861
8862
8863#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8864 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8865 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8866 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8867
8868#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8869 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8870 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8871 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8872
8873#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8874 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8875 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8876 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8877
8878#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8879 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8880 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8881 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8882
8883#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8884 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8885 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8886 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8887
8888
8889#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8890 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8891 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8892 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8893
8894#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8895 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8896 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8897 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8898
8899#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8900 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8901 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8902 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8903
8904#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8905 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8906 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8907 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8908
8909#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8910 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8911 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8912 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8913
8914
8915#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8916 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8917 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8918 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8919
8920#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8921 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8922 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8923 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8924#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8925 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8926 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8927 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8928
8929#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8930 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8931 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8932 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8933
8934#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8935 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8936 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8937 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8938
8939
8940#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8941 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8942 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8943 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8944
8945#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8946 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8947 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8948 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8949
8950
8951#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8952 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8953 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8954 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8955
8956#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8957 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8958 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8959 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8960
8961#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8962 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8963 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8964 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8965
8966#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8967 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8968 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8969 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8970
8971
8972
8973#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8974 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8975 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8976 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8977
8978#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8979 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8980 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8981 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8982
8983#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8984 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8985 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8986 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
8987
8988#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8989 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8990 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8991 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
8992
8993
8994#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8995 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8996 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8997 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
8998
8999#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9000 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9001 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9002 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9003
9004#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9005 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9006 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9007 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9008
9009#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9010 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9011 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9012 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9013
9014#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9015 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
9016 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9017 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9018
9019
9020#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9021 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9022 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9023 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
9024
9025#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9026 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9027 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9028 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9029
9030#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9031 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9032 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9033 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9034
9035#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9036 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9037 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9038 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9039
9040#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9041 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
9042 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9043 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9044
9045
9046#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9047 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9048 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9049 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
9050
9051#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9052 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9053 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9054 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9055
9056#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9057 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9058 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9059 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9060
9061#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9062 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9063 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9064 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9065
9066#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9067 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
9068 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9069 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9070
9071
9072#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9073 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9074 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9075 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9076
9077#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9078 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9079 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
9080 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9081
9082
9083#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9084 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9085 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9086 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
9087
9088#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9089 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9090 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9091 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9092
9093#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9094 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9095 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9096 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9097
9098#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9099 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9100 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9101 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9102
9103
9104DECL_INLINE_THROW(uint32_t)
9105iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9106 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
9107 uintptr_t pfnFunction, uint8_t idxInstr)
9108{
9109 /*
9110 * Assert sanity.
9111 */
9112 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9113 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
9114 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
9115 && pVarMem->cbVar == sizeof(void *),
9116 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9117
9118 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9119 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9120 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
9121 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
9122 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9123
9124 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
9125 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9126 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
9127 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
9128 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9129
9130 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
9131
9132 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9133
9134#ifdef VBOX_STRICT
9135# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
9136 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9137 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9138 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
9139 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9140# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9141 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
9142 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
9143 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
9144
9145 if (iSegReg == UINT8_MAX)
9146 {
9147 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9148 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9149 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9150 switch (cbMem)
9151 {
9152 case 1:
9153 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
9154 Assert(!fAlignMaskAndCtl);
9155 break;
9156 case 2:
9157 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
9158 Assert(fAlignMaskAndCtl < 2);
9159 break;
9160 case 4:
9161 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
9162 Assert(fAlignMaskAndCtl < 4);
9163 break;
9164 case 8:
9165 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
9166 Assert(fAlignMaskAndCtl < 8);
9167 break;
9168 case 10:
9169 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9170 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9171 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9172 Assert(fAlignMaskAndCtl < 8);
9173 break;
9174 case 16:
9175 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
9176 Assert(fAlignMaskAndCtl < 16);
9177 break;
9178# if 0
9179 case 32:
9180 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
9181 Assert(fAlignMaskAndCtl < 32);
9182 break;
9183 case 64:
9184 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
9185 Assert(fAlignMaskAndCtl < 64);
9186 break;
9187# endif
9188 default: AssertFailed(); break;
9189 }
9190 }
9191 else
9192 {
9193 Assert(iSegReg < 6);
9194 switch (cbMem)
9195 {
9196 case 1:
9197 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
9198 Assert(!fAlignMaskAndCtl);
9199 break;
9200 case 2:
9201 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
9202 Assert(fAlignMaskAndCtl < 2);
9203 break;
9204 case 4:
9205 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
9206 Assert(fAlignMaskAndCtl < 4);
9207 break;
9208 case 8:
9209 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
9210 Assert(fAlignMaskAndCtl < 8);
9211 break;
9212 case 10:
9213 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9214 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9215 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9216 Assert(fAlignMaskAndCtl < 8);
9217 break;
9218 case 16:
9219 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
9220 Assert(fAlignMaskAndCtl < 16);
9221 break;
9222# if 0
9223 case 32:
9224 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
9225 Assert(fAlignMaskAndCtl < 32);
9226 break;
9227 case 64:
9228 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
9229 Assert(fAlignMaskAndCtl < 64);
9230 break;
9231# endif
9232 default: AssertFailed(); break;
9233 }
9234 }
9235# undef IEM_MAP_HLP_FN
9236# undef IEM_MAP_HLP_FN_NO_AT
9237#endif
9238
9239#ifdef VBOX_STRICT
9240 /*
9241 * Check that the fExec flags we've got make sense.
9242 */
9243 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9244#endif
9245
9246 /*
9247 * To keep things simple we have to commit any pending writes first as we
9248 * may end up making calls.
9249 */
9250 off = iemNativeRegFlushPendingWrites(pReNative, off);
9251
9252#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9253 /*
9254 * Move/spill/flush stuff out of call-volatile registers.
9255 * This is the easy way out. We could contain this to the tlb-miss branch
9256 * by saving and restoring active stuff here.
9257 */
9258 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9259 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9260#endif
9261
9262 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
9263 while the tlb-miss codepath will temporarily put it on the stack.
9264 Set the the type to stack here so we don't need to do it twice below. */
9265 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9266 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
9267 /** @todo use a tmp register from TlbState, since they'll be free after tlb
9268 * lookup is done. */
9269
9270 /*
9271 * Define labels and allocate the result register (trying for the return
9272 * register if we can).
9273 */
9274 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9275 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9276 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
9277 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
9278 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
9279 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
9280 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
9281 : UINT32_MAX;
9282
9283 /*
9284 * Jump to the TLB lookup code.
9285 */
9286 if (!TlbState.fSkip)
9287 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
9288
9289 /*
9290 * TlbMiss:
9291 *
9292 * Call helper to do the fetching.
9293 * We flush all guest register shadow copies here.
9294 */
9295 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
9296
9297#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9298 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9299#else
9300 RT_NOREF(idxInstr);
9301#endif
9302
9303#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9304 /* Save variables in volatile registers. */
9305 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
9306 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
9307#endif
9308
9309 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
9310 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
9311#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9312 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
9313#else
9314 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9315#endif
9316
9317 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9318 if (iSegReg != UINT8_MAX)
9319 {
9320 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9321 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9322 }
9323
9324 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
9325 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
9326 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
9327
9328 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9329 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9330
9331 /* Done setting up parameters, make the call. */
9332 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9333
9334 /*
9335 * Put the output in the right registers.
9336 */
9337 Assert(idxRegMemResult == pVarMem->idxReg);
9338 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9339 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9340
9341#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9342 /* Restore variables and guest shadow registers to volatile registers. */
9343 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
9344 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
9345#endif
9346
9347 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
9348 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
9349
9350#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9351 if (!TlbState.fSkip)
9352 {
9353 /* end of tlbsmiss - Jump to the done label. */
9354 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9355 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
9356
9357 /*
9358 * TlbLookup:
9359 */
9360 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
9361 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
9362# ifdef IEM_WITH_TLB_STATISTICS
9363 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
9364 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
9365# endif
9366
9367 /* [idxVarUnmapInfo] = 0; */
9368 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
9369
9370 /*
9371 * TlbDone:
9372 */
9373 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9374
9375 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
9376
9377# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9378 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
9379 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9380# endif
9381 }
9382#else
9383 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
9384#endif
9385
9386 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9387 iemNativeVarRegisterRelease(pReNative, idxVarMem);
9388
9389 return off;
9390}
9391
9392
9393#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
9394 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
9395 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
9396
9397#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9398 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
9399 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
9400
9401#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9402 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
9403 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
9404
9405#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9406 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
9407 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
9408
9409DECL_INLINE_THROW(uint32_t)
9410iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9411 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
9412{
9413 /*
9414 * Assert sanity.
9415 */
9416 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9417#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
9418 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9419#endif
9420 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
9421 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9422 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9423#ifdef VBOX_STRICT
9424 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
9425 {
9426 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
9427 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
9428 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
9429 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9430 case IEM_ACCESS_TYPE_WRITE:
9431 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9432 case IEM_ACCESS_TYPE_READ:
9433 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9434 default: AssertFailed();
9435 }
9436#else
9437 RT_NOREF(fAccess);
9438#endif
9439
9440 /*
9441 * To keep things simple we have to commit any pending writes first as we
9442 * may end up making calls (there shouldn't be any at this point, so this
9443 * is just for consistency).
9444 */
9445 /** @todo we could postpone this till we make the call and reload the
9446 * registers after returning from the call. Not sure if that's sensible or
9447 * not, though. */
9448 off = iemNativeRegFlushPendingWrites(pReNative, off);
9449
9450 /*
9451 * Move/spill/flush stuff out of call-volatile registers.
9452 *
9453 * We exclude any register holding the bUnmapInfo variable, as we'll be
9454 * checking it after returning from the call and will free it afterwards.
9455 */
9456 /** @todo save+restore active registers and maybe guest shadows in miss
9457 * scenario. */
9458 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
9459 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
9460
9461 /*
9462 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9463 * to call the unmap helper function.
9464 *
9465 * The likelyhood of it being zero is higher than for the TLB hit when doing
9466 * the mapping, as a TLB miss for an well aligned and unproblematic memory
9467 * access should also end up with a mapping that won't need special unmapping.
9468 */
9469 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
9470 * should speed up things for the pure interpreter as well when TLBs
9471 * are enabled. */
9472#ifdef RT_ARCH_AMD64
9473 if (pVarUnmapInfo->idxReg == UINT8_MAX)
9474 {
9475 /* test byte [rbp - xxx], 0ffh */
9476 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9477 pbCodeBuf[off++] = 0xf6;
9478 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
9479 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9480 pbCodeBuf[off++] = 0xff;
9481 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9482 }
9483 else
9484#endif
9485 {
9486 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
9487 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
9488 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9489 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9490 }
9491 uint32_t const offJmpFixup = off;
9492 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
9493
9494 /*
9495 * Call the unmap helper function.
9496 */
9497#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9498 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9499#else
9500 RT_NOREF(idxInstr);
9501#endif
9502
9503 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
9504 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
9505 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9506
9507 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9508 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9509
9510 /* Done setting up parameters, make the call. */
9511 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9512
9513 /* The bUnmapInfo variable is implictly free by these MCs. */
9514 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
9515
9516 /*
9517 * Done, just fixup the jump for the non-call case.
9518 */
9519 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9520
9521 return off;
9522}
9523
9524
9525
9526/*********************************************************************************************************************************
9527* State and Exceptions *
9528*********************************************************************************************************************************/
9529
9530#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9531#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9532
9533#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9534#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9535#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9536
9537#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9538#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9539#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9540
9541
9542DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9543{
9544#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
9545 RT_NOREF(pReNative, fForChange);
9546#else
9547 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
9548 && fForChange)
9549 {
9550# ifdef RT_ARCH_AMD64
9551
9552 /* Need to save the host MXCSR the first time, and clear the exception flags. */
9553 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9554 {
9555 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9556
9557 /* stmxcsr */
9558 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9559 pbCodeBuf[off++] = X86_OP_REX_B;
9560 pbCodeBuf[off++] = 0x0f;
9561 pbCodeBuf[off++] = 0xae;
9562 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9563 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9564 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9565 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9566 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9567 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9568
9569 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9570 }
9571
9572 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9573 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9574
9575 /*
9576 * Mask any exceptions and clear the exception status and save into MXCSR,
9577 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
9578 * a register source/target (sigh).
9579 */
9580 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
9581 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
9582 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
9583 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9584
9585 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9586
9587 /* ldmxcsr */
9588 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9589 pbCodeBuf[off++] = X86_OP_REX_B;
9590 pbCodeBuf[off++] = 0x0f;
9591 pbCodeBuf[off++] = 0xae;
9592 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9593 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9594 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9595 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9596 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9597 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9598
9599 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9600 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9601
9602# elif defined(RT_ARCH_ARM64)
9603 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9604
9605 /* Need to save the host floating point control register the first time, clear FPSR. */
9606 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9607 {
9608 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9609 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9610 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9611 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9612 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9613 }
9614
9615 /*
9616 * Translate MXCSR to FPCR.
9617 *
9618 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9619 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9620 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9621 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9622 */
9623 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9624 * and implement alternate handling if FEAT_AFP is present. */
9625 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9626
9627 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9628
9629 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9630 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9631
9632 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9633 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9634 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9635 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9636 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9637
9638 /*
9639 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9640 *
9641 * Value MXCSR FPCR
9642 * 0 RN RN
9643 * 1 R- R+
9644 * 2 R+ R-
9645 * 3 RZ RZ
9646 *
9647 * Conversion can be achieved by switching bit positions
9648 */
9649 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9650 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9651 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9652 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9653
9654 /* Write the value to FPCR. */
9655 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9656
9657 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9658 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9659 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9660# else
9661# error "Port me"
9662# endif
9663 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9664 }
9665#endif
9666 return off;
9667}
9668
9669
9670
9671/*********************************************************************************************************************************
9672* Emitters for FPU related operations. *
9673*********************************************************************************************************************************/
9674
9675#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9676 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9677
9678/** Emits code for IEM_MC_FETCH_FCW. */
9679DECL_INLINE_THROW(uint32_t)
9680iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9681{
9682 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9683 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9684
9685 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9686
9687 /* Allocate a temporary FCW register. */
9688 /** @todo eliminate extra register */
9689 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9690 kIemNativeGstRegUse_ReadOnly);
9691
9692 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9693
9694 /* Free but don't flush the FCW register. */
9695 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9696 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9697
9698 return off;
9699}
9700
9701
9702#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9703 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9704
9705/** Emits code for IEM_MC_FETCH_FSW. */
9706DECL_INLINE_THROW(uint32_t)
9707iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9708{
9709 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9710 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9711
9712 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
9713 /* Allocate a temporary FSW register. */
9714 /** @todo eliminate extra register */
9715 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9716 kIemNativeGstRegUse_ReadOnly);
9717
9718 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9719
9720 /* Free but don't flush the FSW register. */
9721 iemNativeRegFreeTmp(pReNative, idxFswReg);
9722 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9723
9724 return off;
9725}
9726
9727
9728
9729#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9730
9731
9732/*********************************************************************************************************************************
9733* Emitters for SSE/AVX specific operations. *
9734*********************************************************************************************************************************/
9735
9736#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9737 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9738
9739/** Emits code for IEM_MC_COPY_XREG_U128. */
9740DECL_INLINE_THROW(uint32_t)
9741iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9742{
9743 /* This is a nop if the source and destination register are the same. */
9744 if (iXRegDst != iXRegSrc)
9745 {
9746 /* Allocate destination and source register. */
9747 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9748 kIemNativeGstSimdRegLdStSz_Low128,
9749 kIemNativeGstRegUse_ForFullWrite);
9750 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9751 kIemNativeGstSimdRegLdStSz_Low128,
9752 kIemNativeGstRegUse_ReadOnly);
9753
9754 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9755
9756 /* Free but don't flush the source and destination register. */
9757 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9758 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9759 }
9760
9761 return off;
9762}
9763
9764
9765#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9766 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9767
9768/** Emits code for IEM_MC_FETCH_XREG_U128. */
9769DECL_INLINE_THROW(uint32_t)
9770iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9771{
9772 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9773 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9774
9775 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9776 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9777
9778 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9779
9780 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9781
9782 /* Free but don't flush the source register. */
9783 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9784 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9785
9786 return off;
9787}
9788
9789
9790#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9791 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9792
9793#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9794 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9795
9796/** Emits code for IEM_MC_FETCH_XREG_U64. */
9797DECL_INLINE_THROW(uint32_t)
9798iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9799{
9800 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9801 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9802
9803 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9804 kIemNativeGstSimdRegLdStSz_Low128,
9805 kIemNativeGstRegUse_ReadOnly);
9806
9807 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9808 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9809
9810 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9811
9812 /* Free but don't flush the source register. */
9813 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9814 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9815
9816 return off;
9817}
9818
9819
9820#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9821 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9822
9823#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9824 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9825
9826/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9827DECL_INLINE_THROW(uint32_t)
9828iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9829{
9830 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9831 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9832
9833 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9834 kIemNativeGstSimdRegLdStSz_Low128,
9835 kIemNativeGstRegUse_ReadOnly);
9836
9837 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9838 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9839
9840 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9841
9842 /* Free but don't flush the source register. */
9843 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9844 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9845
9846 return off;
9847}
9848
9849
9850#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9851 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9852
9853/** Emits code for IEM_MC_FETCH_XREG_U16. */
9854DECL_INLINE_THROW(uint32_t)
9855iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9856{
9857 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9858 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9859
9860 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9861 kIemNativeGstSimdRegLdStSz_Low128,
9862 kIemNativeGstRegUse_ReadOnly);
9863
9864 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9865 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9866
9867 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9868
9869 /* Free but don't flush the source register. */
9870 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9871 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9872
9873 return off;
9874}
9875
9876
9877#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9878 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9879
9880/** Emits code for IEM_MC_FETCH_XREG_U8. */
9881DECL_INLINE_THROW(uint32_t)
9882iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9883{
9884 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9885 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9886
9887 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9888 kIemNativeGstSimdRegLdStSz_Low128,
9889 kIemNativeGstRegUse_ReadOnly);
9890
9891 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9892 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9893
9894 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9895
9896 /* Free but don't flush the source register. */
9897 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9898 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9899
9900 return off;
9901}
9902
9903
9904#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9905 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9906
9907AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9908#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9909 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9910
9911
9912/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9913DECL_INLINE_THROW(uint32_t)
9914iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9915{
9916 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9917 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9918
9919 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9920 kIemNativeGstSimdRegLdStSz_Low128,
9921 kIemNativeGstRegUse_ForFullWrite);
9922 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9923
9924 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9925
9926 /* Free but don't flush the source register. */
9927 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9928 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9929
9930 return off;
9931}
9932
9933
9934#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9935 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9936
9937#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9938 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9939
9940#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9941 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9942
9943#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9944 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9945
9946#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9947 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9948
9949#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9950 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9951
9952/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9953DECL_INLINE_THROW(uint32_t)
9954iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9955 uint8_t cbLocal, uint8_t iElem)
9956{
9957 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9958 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
9959
9960#ifdef VBOX_STRICT
9961 switch (cbLocal)
9962 {
9963 case sizeof(uint64_t): Assert(iElem < 2); break;
9964 case sizeof(uint32_t): Assert(iElem < 4); break;
9965 case sizeof(uint16_t): Assert(iElem < 8); break;
9966 case sizeof(uint8_t): Assert(iElem < 16); break;
9967 default: AssertFailed();
9968 }
9969#endif
9970
9971 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9972 kIemNativeGstSimdRegLdStSz_Low128,
9973 kIemNativeGstRegUse_ForUpdate);
9974 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9975
9976 switch (cbLocal)
9977 {
9978 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9979 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9980 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9981 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9982 default: AssertFailed();
9983 }
9984
9985 /* Free but don't flush the source register. */
9986 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9987 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9988
9989 return off;
9990}
9991
9992
9993#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
9994 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
9995
9996/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
9997DECL_INLINE_THROW(uint32_t)
9998iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9999{
10000 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10001 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10002
10003 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10004 kIemNativeGstSimdRegLdStSz_Low128,
10005 kIemNativeGstRegUse_ForUpdate);
10006 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10007
10008 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
10009 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10010 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10011
10012 /* Free but don't flush the source register. */
10013 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10014 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10015
10016 return off;
10017}
10018
10019
10020#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
10021 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
10022
10023/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
10024DECL_INLINE_THROW(uint32_t)
10025iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10026{
10027 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10028 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10029
10030 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10031 kIemNativeGstSimdRegLdStSz_Low128,
10032 kIemNativeGstRegUse_ForUpdate);
10033 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10034
10035 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
10036 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10037 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10038
10039 /* Free but don't flush the source register. */
10040 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10041 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10042
10043 return off;
10044}
10045
10046
10047#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
10048 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
10049
10050/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
10051DECL_INLINE_THROW(uint32_t)
10052iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
10053 uint8_t idxSrcVar, uint8_t iDwSrc)
10054{
10055 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10056 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10057
10058 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10059 kIemNativeGstSimdRegLdStSz_Low128,
10060 kIemNativeGstRegUse_ForUpdate);
10061 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10062
10063 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
10064 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
10065
10066 /* Free but don't flush the destination register. */
10067 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10068 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10069
10070 return off;
10071}
10072
10073
10074#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10075 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10076
10077/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
10078DECL_INLINE_THROW(uint32_t)
10079iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10080{
10081 /*
10082 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10083 * if iYRegDst gets allocated first for the full write it won't load the
10084 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10085 * duplicated from the already allocated host register for iYRegDst containing
10086 * garbage. This will be catched by the guest register value checking in debug
10087 * builds.
10088 */
10089 if (iYRegDst != iYRegSrc)
10090 {
10091 /* Allocate destination and source register. */
10092 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10093 kIemNativeGstSimdRegLdStSz_256,
10094 kIemNativeGstRegUse_ForFullWrite);
10095 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10096 kIemNativeGstSimdRegLdStSz_Low128,
10097 kIemNativeGstRegUse_ReadOnly);
10098
10099 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10100 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10101
10102 /* Free but don't flush the source and destination register. */
10103 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10104 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10105 }
10106 else
10107 {
10108 /* This effectively only clears the upper 128-bits of the register. */
10109 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10110 kIemNativeGstSimdRegLdStSz_High128,
10111 kIemNativeGstRegUse_ForFullWrite);
10112
10113 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10114
10115 /* Free but don't flush the destination register. */
10116 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10117 }
10118
10119 return off;
10120}
10121
10122
10123#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10124 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10125
10126/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
10127DECL_INLINE_THROW(uint32_t)
10128iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10129{
10130 /*
10131 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10132 * if iYRegDst gets allocated first for the full write it won't load the
10133 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10134 * duplicated from the already allocated host register for iYRegDst containing
10135 * garbage. This will be catched by the guest register value checking in debug
10136 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
10137 * for a zmm register we don't support yet, so this is just a nop.
10138 */
10139 if (iYRegDst != iYRegSrc)
10140 {
10141 /* Allocate destination and source register. */
10142 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10143 kIemNativeGstSimdRegLdStSz_256,
10144 kIemNativeGstRegUse_ReadOnly);
10145 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10146 kIemNativeGstSimdRegLdStSz_256,
10147 kIemNativeGstRegUse_ForFullWrite);
10148
10149 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10150
10151 /* Free but don't flush the source and destination register. */
10152 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10153 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10154 }
10155
10156 return off;
10157}
10158
10159
10160#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
10161 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
10162
10163/** Emits code for IEM_MC_FETCH_YREG_U128. */
10164DECL_INLINE_THROW(uint32_t)
10165iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
10166{
10167 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10168 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10169
10170 Assert(iDQWord <= 1);
10171 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10172 iDQWord == 1
10173 ? kIemNativeGstSimdRegLdStSz_High128
10174 : kIemNativeGstSimdRegLdStSz_Low128,
10175 kIemNativeGstRegUse_ReadOnly);
10176
10177 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10178 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10179
10180 if (iDQWord == 1)
10181 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10182 else
10183 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10184
10185 /* Free but don't flush the source register. */
10186 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10187 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10188
10189 return off;
10190}
10191
10192
10193#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
10194 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
10195
10196/** Emits code for IEM_MC_FETCH_YREG_U64. */
10197DECL_INLINE_THROW(uint32_t)
10198iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
10199{
10200 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10201 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10202
10203 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10204 iQWord >= 2
10205 ? kIemNativeGstSimdRegLdStSz_High128
10206 : kIemNativeGstSimdRegLdStSz_Low128,
10207 kIemNativeGstRegUse_ReadOnly);
10208
10209 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10210 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10211
10212 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10213
10214 /* Free but don't flush the source register. */
10215 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10216 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10217
10218 return off;
10219}
10220
10221
10222#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
10223 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
10224
10225/** Emits code for IEM_MC_FETCH_YREG_U32. */
10226DECL_INLINE_THROW(uint32_t)
10227iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
10228{
10229 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10230 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10231
10232 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10233 iDWord >= 4
10234 ? kIemNativeGstSimdRegLdStSz_High128
10235 : kIemNativeGstSimdRegLdStSz_Low128,
10236 kIemNativeGstRegUse_ReadOnly);
10237
10238 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10239 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10240
10241 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10242
10243 /* Free but don't flush the source register. */
10244 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10245 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10246
10247 return off;
10248}
10249
10250
10251#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
10252 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
10253
10254/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
10255DECL_INLINE_THROW(uint32_t)
10256iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10257{
10258 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10259 kIemNativeGstSimdRegLdStSz_High128,
10260 kIemNativeGstRegUse_ForFullWrite);
10261
10262 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10263
10264 /* Free but don't flush the register. */
10265 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10266
10267 return off;
10268}
10269
10270
10271#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
10272 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
10273
10274/** Emits code for IEM_MC_STORE_YREG_U128. */
10275DECL_INLINE_THROW(uint32_t)
10276iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
10277{
10278 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10279 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10280
10281 Assert(iDQword <= 1);
10282 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10283 iDQword == 0
10284 ? kIemNativeGstSimdRegLdStSz_Low128
10285 : kIemNativeGstSimdRegLdStSz_High128,
10286 kIemNativeGstRegUse_ForFullWrite);
10287
10288 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10289
10290 if (iDQword == 0)
10291 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10292 else
10293 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
10294
10295 /* Free but don't flush the source register. */
10296 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10297 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10298
10299 return off;
10300}
10301
10302
10303#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10304 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10305
10306/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
10307DECL_INLINE_THROW(uint32_t)
10308iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10309{
10310 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10311 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10312
10313 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10314 kIemNativeGstSimdRegLdStSz_256,
10315 kIemNativeGstRegUse_ForFullWrite);
10316
10317 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10318
10319 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10320 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10321
10322 /* Free but don't flush the source register. */
10323 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10324 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10325
10326 return off;
10327}
10328
10329
10330#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
10331 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
10332
10333/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
10334DECL_INLINE_THROW(uint32_t)
10335iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10336{
10337 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10338 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10339
10340 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10341 kIemNativeGstSimdRegLdStSz_256,
10342 kIemNativeGstRegUse_ForFullWrite);
10343
10344 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10345
10346 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10347 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10348
10349 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10350 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10351
10352 return off;
10353}
10354
10355
10356#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
10357 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
10358
10359/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
10360DECL_INLINE_THROW(uint32_t)
10361iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10362{
10363 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10364 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10365
10366 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10367 kIemNativeGstSimdRegLdStSz_256,
10368 kIemNativeGstRegUse_ForFullWrite);
10369
10370 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10371
10372 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10373 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10374
10375 /* Free but don't flush the source register. */
10376 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10377 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10378
10379 return off;
10380}
10381
10382
10383#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
10384 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
10385
10386/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
10387DECL_INLINE_THROW(uint32_t)
10388iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10389{
10390 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10391 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10392
10393 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10394 kIemNativeGstSimdRegLdStSz_256,
10395 kIemNativeGstRegUse_ForFullWrite);
10396
10397 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10398
10399 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10400 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10401
10402 /* Free but don't flush the source register. */
10403 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10404 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10405
10406 return off;
10407}
10408
10409
10410#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
10411 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
10412
10413/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
10414DECL_INLINE_THROW(uint32_t)
10415iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10416{
10417 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10418 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10419
10420 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10421 kIemNativeGstSimdRegLdStSz_256,
10422 kIemNativeGstRegUse_ForFullWrite);
10423
10424 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10425
10426 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10427 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10428
10429 /* Free but don't flush the source register. */
10430 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10431 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10432
10433 return off;
10434}
10435
10436
10437#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
10438 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
10439
10440/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
10441DECL_INLINE_THROW(uint32_t)
10442iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10443{
10444 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10445 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10446
10447 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10448 kIemNativeGstSimdRegLdStSz_256,
10449 kIemNativeGstRegUse_ForFullWrite);
10450
10451 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10452
10453 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10454
10455 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10456 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10457
10458 return off;
10459}
10460
10461
10462#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
10463 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
10464
10465/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
10466DECL_INLINE_THROW(uint32_t)
10467iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10468{
10469 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10470 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10471
10472 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10473 kIemNativeGstSimdRegLdStSz_256,
10474 kIemNativeGstRegUse_ForFullWrite);
10475
10476 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10477
10478 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10479
10480 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10481 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10482
10483 return off;
10484}
10485
10486
10487#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10488 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10489
10490/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
10491DECL_INLINE_THROW(uint32_t)
10492iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10493{
10494 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10495 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10496
10497 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10498 kIemNativeGstSimdRegLdStSz_256,
10499 kIemNativeGstRegUse_ForFullWrite);
10500
10501 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10502
10503 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10504
10505 /* Free but don't flush the source register. */
10506 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10507 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10508
10509 return off;
10510}
10511
10512
10513#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10514 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10515
10516/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
10517DECL_INLINE_THROW(uint32_t)
10518iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10519{
10520 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10521 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10522
10523 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10524 kIemNativeGstSimdRegLdStSz_256,
10525 kIemNativeGstRegUse_ForFullWrite);
10526
10527 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10528
10529 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10530
10531 /* Free but don't flush the source register. */
10532 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10533 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10534
10535 return off;
10536}
10537
10538
10539#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10540 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10541
10542/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
10543DECL_INLINE_THROW(uint32_t)
10544iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10545{
10546 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10547 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10548
10549 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10550 kIemNativeGstSimdRegLdStSz_256,
10551 kIemNativeGstRegUse_ForFullWrite);
10552
10553 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10554
10555 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
10556
10557 /* Free but don't flush the source register. */
10558 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10559 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10560
10561 return off;
10562}
10563
10564
10565#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10566 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10567
10568/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
10569DECL_INLINE_THROW(uint32_t)
10570iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10571{
10572 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10573 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10574
10575 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10576 kIemNativeGstSimdRegLdStSz_256,
10577 kIemNativeGstRegUse_ForFullWrite);
10578
10579 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10580
10581 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10582 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
10583
10584 /* Free but don't flush the source register. */
10585 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10586 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10587
10588 return off;
10589}
10590
10591
10592#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10593 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10594
10595/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
10596DECL_INLINE_THROW(uint32_t)
10597iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10598{
10599 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10600 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10601
10602 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10603 kIemNativeGstSimdRegLdStSz_256,
10604 kIemNativeGstRegUse_ForFullWrite);
10605
10606 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10607
10608 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10609 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10610
10611 /* Free but don't flush the source register. */
10612 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10613 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10614
10615 return off;
10616}
10617
10618
10619#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10620 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10621
10622/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10623DECL_INLINE_THROW(uint32_t)
10624iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10625{
10626 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10627 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10628
10629 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10630 kIemNativeGstSimdRegLdStSz_256,
10631 kIemNativeGstRegUse_ForFullWrite);
10632 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10633 kIemNativeGstSimdRegLdStSz_Low128,
10634 kIemNativeGstRegUse_ReadOnly);
10635 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10636
10637 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10638 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10639 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10640
10641 /* Free but don't flush the source and destination registers. */
10642 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10643 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10644 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10645
10646 return off;
10647}
10648
10649
10650#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10651 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10652
10653/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10654DECL_INLINE_THROW(uint32_t)
10655iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10656{
10657 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10658 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10659
10660 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10661 kIemNativeGstSimdRegLdStSz_256,
10662 kIemNativeGstRegUse_ForFullWrite);
10663 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10664 kIemNativeGstSimdRegLdStSz_Low128,
10665 kIemNativeGstRegUse_ReadOnly);
10666 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10667
10668 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10669 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10670 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10671
10672 /* Free but don't flush the source and destination registers. */
10673 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10674 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10675 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10676
10677 return off;
10678}
10679
10680
10681#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10682 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10683
10684
10685/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10686DECL_INLINE_THROW(uint32_t)
10687iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10688{
10689 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10690 kIemNativeGstSimdRegLdStSz_Low128,
10691 kIemNativeGstRegUse_ForUpdate);
10692
10693 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10694 if (bImm8Mask & RT_BIT(0))
10695 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10696 if (bImm8Mask & RT_BIT(1))
10697 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10698 if (bImm8Mask & RT_BIT(2))
10699 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10700 if (bImm8Mask & RT_BIT(3))
10701 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10702
10703 /* Free but don't flush the destination register. */
10704 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10705
10706 return off;
10707}
10708
10709
10710#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10711 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10712
10713#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10714 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10715
10716/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10717DECL_INLINE_THROW(uint32_t)
10718iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10719{
10720 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10721 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10722
10723 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10724 kIemNativeGstSimdRegLdStSz_256,
10725 kIemNativeGstRegUse_ReadOnly);
10726 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10727
10728 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10729
10730 /* Free but don't flush the source register. */
10731 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10732 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10733
10734 return off;
10735}
10736
10737
10738#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10739 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10740
10741#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10742 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10743
10744/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10745DECL_INLINE_THROW(uint32_t)
10746iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10747{
10748 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10749 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10750
10751 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10752 kIemNativeGstSimdRegLdStSz_256,
10753 kIemNativeGstRegUse_ForFullWrite);
10754 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10755
10756 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10757
10758 /* Free but don't flush the source register. */
10759 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10760 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10761
10762 return off;
10763}
10764
10765
10766#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10767 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10768
10769
10770/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10771DECL_INLINE_THROW(uint32_t)
10772iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10773 uint8_t idxSrcVar, uint8_t iDwSrc)
10774{
10775 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10776 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10777
10778 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10779 iDwDst < 4
10780 ? kIemNativeGstSimdRegLdStSz_Low128
10781 : kIemNativeGstSimdRegLdStSz_High128,
10782 kIemNativeGstRegUse_ForUpdate);
10783 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10784 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10785
10786 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10787 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10788
10789 /* Free but don't flush the source register. */
10790 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10791 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10792 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10793
10794 return off;
10795}
10796
10797
10798#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10799 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10800
10801
10802/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10803DECL_INLINE_THROW(uint32_t)
10804iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10805 uint8_t idxSrcVar, uint8_t iQwSrc)
10806{
10807 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10808 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10809
10810 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10811 iQwDst < 2
10812 ? kIemNativeGstSimdRegLdStSz_Low128
10813 : kIemNativeGstSimdRegLdStSz_High128,
10814 kIemNativeGstRegUse_ForUpdate);
10815 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10816 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10817
10818 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10819 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10820
10821 /* Free but don't flush the source register. */
10822 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10823 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10824 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10825
10826 return off;
10827}
10828
10829
10830#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10831 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10832
10833
10834/** Emits code for IEM_MC_STORE_YREG_U64. */
10835DECL_INLINE_THROW(uint32_t)
10836iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10837{
10838 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10839 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10840
10841 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10842 iQwDst < 2
10843 ? kIemNativeGstSimdRegLdStSz_Low128
10844 : kIemNativeGstSimdRegLdStSz_High128,
10845 kIemNativeGstRegUse_ForUpdate);
10846
10847 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10848
10849 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10850
10851 /* Free but don't flush the source register. */
10852 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10853 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10854
10855 return off;
10856}
10857
10858
10859#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
10860 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
10861
10862/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10863DECL_INLINE_THROW(uint32_t)
10864iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10865{
10866 RT_NOREF(pReNative, iYReg);
10867 /** @todo Needs to be implemented when support for AVX-512 is added. */
10868 return off;
10869}
10870
10871
10872
10873/*********************************************************************************************************************************
10874* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10875*********************************************************************************************************************************/
10876
10877/**
10878 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10879 */
10880DECL_INLINE_THROW(uint32_t)
10881iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
10882{
10883 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10884 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10885 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10886 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10887
10888#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
10889 /*
10890 * Need to do the FPU preparation.
10891 */
10892 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10893#endif
10894
10895 /*
10896 * Do all the call setup and cleanup.
10897 */
10898 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10899 false /*fFlushPendingWrites*/);
10900
10901 /*
10902 * Load the MXCSR register into the first argument and mask out the current exception flags.
10903 */
10904 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10905 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10906
10907 /*
10908 * Make the call.
10909 */
10910 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10911
10912 /*
10913 * The updated MXCSR is in the return register, update exception status flags.
10914 *
10915 * The return register is marked allocated as a temporary because it is required for the
10916 * exception generation check below.
10917 */
10918 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
10919 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
10920 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
10921
10922#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10923 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10924 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
10925#endif
10926
10927 /*
10928 * Make sure we don't have any outstanding guest register writes as we may
10929 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
10930 */
10931 off = iemNativeRegFlushPendingWrites(pReNative, off);
10932
10933#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10934 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10935#else
10936 RT_NOREF(idxInstr);
10937#endif
10938
10939 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
10940 * want to assume the existence for this instruction at the moment. */
10941 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
10942
10943 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
10944 /* tmp &= X86_MXCSR_XCPT_MASK */
10945 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
10946 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
10947 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
10948 /* tmp = ~tmp */
10949 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
10950 /* tmp &= mxcsr */
10951 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
10952 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
10953 kIemNativeLabelType_RaiseSseAvxFpRelated);
10954
10955 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
10956 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10957 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
10958
10959 return off;
10960}
10961
10962
10963#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
10964 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10965
10966/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
10967DECL_INLINE_THROW(uint32_t)
10968iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10969{
10970 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10971 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10972 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
10973}
10974
10975
10976#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10977 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10978
10979/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
10980DECL_INLINE_THROW(uint32_t)
10981iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
10982 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10983{
10984 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10985 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10986 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10987 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
10988}
10989
10990
10991/*********************************************************************************************************************************
10992* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
10993*********************************************************************************************************************************/
10994
10995#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
10996 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10997
10998/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
10999DECL_INLINE_THROW(uint32_t)
11000iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11001{
11002 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11003 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11004 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11005}
11006
11007
11008#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11009 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11010
11011/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
11012DECL_INLINE_THROW(uint32_t)
11013iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11014 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11015{
11016 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11017 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11018 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11019 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11020}
11021
11022
11023#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
11024
11025
11026/*********************************************************************************************************************************
11027* Include instruction emitters. *
11028*********************************************************************************************************************************/
11029#include "target-x86/IEMAllN8veEmit-x86.h"
11030
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette