VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 105855

Last change on this file since 105855 was 105855, checked in by vboxsync, 3 months ago

VMM/IEM: Don't update PC during IEM_MC_REL_JMP_S[8|32]_AND_FINISH if we can avoid it. This extends the offPc to 64-bit and tries to make the stats more accurate. This is more on todo 4 in bugref:10720. bugref:10373

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 521.1 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 105855 2024-08-23 23:12:23Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
82# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
83#endif
84
85
86/*********************************************************************************************************************************
87* Code emitters for flushing pending guest register writes and sanity checks *
88*********************************************************************************************************************************/
89
90#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
91
92# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
93/**
94 * Updates IEMCPU::uPcUpdatingDebug.
95 */
96DECL_INLINE_THROW(uint32_t) iemNativeEmitPcDebugAdd(PIEMRECOMPILERSTATE pReNative, uint32_t off, int64_t offDisp, uint8_t cBits)
97{
98# ifdef RT_ARCH_AMD64
99 if (pReNative->Core.fDebugPcInitialized && cBits >= 32)
100 {
101 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
102 if ((int32_t)offDisp == offDisp || cBits != 64)
103 {
104 /* add [q]word [pVCpu->iem.s.uPcUpdatingDebug], imm32/imm8 */
105 if (cBits == 64)
106 pCodeBuf[off++] = X86_OP_REX_W;
107 pCodeBuf[off++] = (int8_t)offDisp == offDisp ? 0x83 : 0x81;
108 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
109 if ((int8_t)offDisp == offDisp)
110 pCodeBuf[off++] = (int8_t)offDisp;
111 else
112 {
113 *(int32_t *)&pCodeBuf[off] = (int32_t)offDisp;
114 off += sizeof(int32_t);
115 }
116 }
117 else
118 {
119 /* mov tmp0, imm64 */
120 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, offDisp);
121
122 /* add [pVCpu->iem.s.uPcUpdatingDebug], tmp0 */
123 if (cBits == 64)
124 pCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
125 else if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
126 pCodeBuf[off++] = X86_OP_REX_R;
127 pCodeBuf[off++] = 0x01;
128 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0 & 7,
129 RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
130 }
131 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
132 return off;
133 }
134# endif
135
136 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
137 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 12);
138
139 if (pReNative->Core.fDebugPcInitialized)
140 {
141 Log4(("uPcUpdatingDebug+=%ld cBits=%d off=%#x\n", offDisp, cBits, off));
142 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
143 }
144 else
145 {
146 Log4(("uPcUpdatingDebug=rip+%ld cBits=%d off=%#x\n", offDisp, cBits, off));
147 pReNative->Core.fDebugPcInitialized = true;
148 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
149 }
150
151 if (cBits == 64)
152 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxTmpReg, offDisp, IEMNATIVE_REG_FIXED_TMP0);
153 else
154 {
155 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxTmpReg, (int32_t)offDisp, IEMNATIVE_REG_FIXED_TMP0);
156 if (cBits == 16)
157 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, UINT16_MAX);
158 }
159
160 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug),
161 IEMNATIVE_REG_FIXED_TMP0);
162
163 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
164 iemNativeRegFreeTmp(pReNative, idxTmpReg);
165 return off;
166}
167
168
169# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
170DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
171{
172 /* Compare the shadow with the context value, they should match. */
173 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
174 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
175 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
176 return off;
177}
178# endif
179
180#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
181
182/**
183 * Flushes delayed write of a specific guest register.
184 *
185 * This must be called prior to calling CImpl functions and any helpers that use
186 * the guest state (like raising exceptions) and such.
187 *
188 * This optimization has not yet been implemented. The first target would be
189 * RIP updates, since these are the most common ones.
190 */
191DECL_INLINE_THROW(uint32_t)
192iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
193{
194#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
195 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
196#endif
197
198#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
199#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
200 if ( enmClass == kIemNativeGstRegRef_EFlags
201 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
202 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
203#else
204 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
205#endif
206
207 if ( enmClass == kIemNativeGstRegRef_Gpr
208 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
209 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
210#endif
211
212#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
213 if ( enmClass == kIemNativeGstRegRef_XReg
214 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
215 {
216 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
217 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
218 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
219
220 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
221 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
222 }
223#endif
224 RT_NOREF(pReNative, enmClass, idxReg);
225 return off;
226}
227
228
229
230/*********************************************************************************************************************************
231* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
232*********************************************************************************************************************************/
233
234#undef IEM_MC_BEGIN /* unused */
235#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
236 { \
237 Assert(pReNative->Core.bmVars == 0); \
238 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
239 Assert(pReNative->Core.bmStack == 0); \
240 pReNative->fMc = (a_fMcFlags); \
241 pReNative->fCImpl = (a_fCImplFlags); \
242 pReNative->cArgsX = (a_cArgsIncludingHidden)
243
244/** We have to get to the end in recompilation mode, as otherwise we won't
245 * generate code for all the IEM_MC_IF_XXX branches. */
246#define IEM_MC_END() \
247 iemNativeVarFreeAll(pReNative); \
248 } return off
249
250
251
252/*********************************************************************************************************************************
253* Native Emitter Support. *
254*********************************************************************************************************************************/
255
256#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
257
258#define IEM_MC_NATIVE_ELSE() } else {
259
260#define IEM_MC_NATIVE_ENDIF() } ((void)0)
261
262
263#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
264 off = a_fnEmitter(pReNative, off)
265
266#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
267 off = a_fnEmitter(pReNative, off, (a0))
268
269#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
270 off = a_fnEmitter(pReNative, off, (a0), (a1))
271
272#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
273 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
274
275#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
276 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
277
278#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
279 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
280
281#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
282 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
283
284#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
285 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
286
287#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
288 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
289
290#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
291 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
292
293
294#ifndef RT_ARCH_AMD64
295# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
296#else
297/** @note This is a naive approach that ASSUMES that the register isn't
298 * allocated, so it only works safely for the first allocation(s) in
299 * a MC block. */
300# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
301 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
302
303DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
304
305DECL_INLINE_THROW(uint32_t)
306iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
307{
308 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
309 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
310 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
311
312# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
313 /* Must flush the register if it hold pending writes. */
314 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
315 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
316 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
317# endif
318
319 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
320 return off;
321}
322
323#endif /* RT_ARCH_AMD64 */
324
325
326
327/*********************************************************************************************************************************
328* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
329*********************************************************************************************************************************/
330
331#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
332 pReNative->fMc = 0; \
333 pReNative->fCImpl = (a_fFlags); \
334 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
335 a_cbInstr) /** @todo not used ... */
336
337
338#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
339 pReNative->fMc = 0; \
340 pReNative->fCImpl = (a_fFlags); \
341 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
342
343DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
344 uint8_t idxInstr, uint64_t a_fGstShwFlush,
345 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
346{
347 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
348}
349
350
351#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
352 pReNative->fMc = 0; \
353 pReNative->fCImpl = (a_fFlags); \
354 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
355 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
356
357DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
358 uint8_t idxInstr, uint64_t a_fGstShwFlush,
359 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
360{
361 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
362}
363
364
365#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
366 pReNative->fMc = 0; \
367 pReNative->fCImpl = (a_fFlags); \
368 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
369 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
370
371DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
372 uint8_t idxInstr, uint64_t a_fGstShwFlush,
373 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
374 uint64_t uArg2)
375{
376 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
377}
378
379
380
381/*********************************************************************************************************************************
382* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
383*********************************************************************************************************************************/
384
385/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
386 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
387DECL_INLINE_THROW(uint32_t)
388iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
389{
390 /*
391 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
392 * return with special status code and make the execution loop deal with
393 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
394 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
395 * could continue w/o interruption, it probably will drop into the
396 * debugger, so not worth the effort of trying to services it here and we
397 * just lump it in with the handling of the others.
398 *
399 * To simplify the code and the register state management even more (wrt
400 * immediate in AND operation), we always update the flags and skip the
401 * extra check associated conditional jump.
402 */
403 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
404 <= UINT32_MAX);
405#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
406 AssertMsg( pReNative->idxCurCall == 0
407 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
408 IEMLIVENESSBIT_IDX_EFL_OTHER)),
409 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
410 IEMLIVENESSBIT_IDX_EFL_OTHER)));
411#endif
412
413 /*
414 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
415 * any pending register writes must be flushed.
416 */
417 off = iemNativeRegFlushPendingWrites(pReNative, off);
418
419 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
420 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
421 true /*fSkipLivenessAssert*/);
422 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxEflReg,
423 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
424 kIemNativeLabelType_ReturnWithFlags);
425 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
426 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
427
428 /* Free but don't flush the EFLAGS register. */
429 iemNativeRegFreeTmp(pReNative, idxEflReg);
430
431 return off;
432}
433
434
435/** Helper for iemNativeEmitFinishInstructionWithStatus. */
436DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
437{
438 unsigned const offOpcodes = pCallEntry->offOpcode;
439 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
440 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
441 {
442 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
443 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
444 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
445 }
446 AssertFailedReturn(NIL_RTGCPHYS);
447}
448
449
450/** The VINF_SUCCESS dummy. */
451template<int const a_rcNormal, bool const a_fIsJump>
452DECL_FORCE_INLINE_THROW(uint32_t)
453iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
454 int32_t const offJump)
455{
456 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
457 if (a_rcNormal != VINF_SUCCESS)
458 {
459#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
460 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
461#else
462 RT_NOREF_PV(pCallEntry);
463#endif
464
465 /* As this code returns from the TB any pending register writes must be flushed. */
466 off = iemNativeRegFlushPendingWrites(pReNative, off);
467
468 /*
469 * If we're in a conditional, mark the current branch as exiting so we
470 * can disregard its state when we hit the IEM_MC_ENDIF.
471 */
472 iemNativeMarkCurCondBranchAsExiting(pReNative);
473
474 /*
475 * Use the lookup table for getting to the next TB quickly.
476 * Note! In this code path there can only be one entry at present.
477 */
478 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
479 PCIEMTB const pTbOrg = pReNative->pTbOrg;
480 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
481 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
482
483#if 0
484 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
485 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
486 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
487 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
488 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
489
490 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
491
492#else
493 /* Load the index as argument #1 for the helper call at the given label. */
494 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
495
496 /*
497 * Figure out the physical address of the current instruction and see
498 * whether the next instruction we're about to execute is in the same
499 * page so we by can optimistically skip TLB loading.
500 *
501 * - This is safe for all cases in FLAT mode.
502 * - In segmentmented modes it is complicated, given that a negative
503 * jump may underflow EIP and a forward jump may overflow or run into
504 * CS.LIM and triggering a #GP. The only thing we can get away with
505 * now at compile time is forward jumps w/o CS.LIM checks, since the
506 * lack of CS.LIM checks means we're good for the entire physical page
507 * we're executing on and another 15 bytes before we run into CS.LIM.
508 */
509 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
510# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
511 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
512# endif
513 )
514 {
515 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
516 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
517 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
518 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
519
520 {
521 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
522 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
523
524 /* Load the key lookup flags into the 2nd argument for the helper call.
525 - This is safe wrt CS limit checking since we're only here for FLAT modes.
526 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
527 interrupt shadow.
528 - The NMI inhibiting is more questionable, though... */
529 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
530 * Should we copy it into fExec to simplify this? OTOH, it's just a
531 * couple of extra instructions if EFLAGS are already in a register. */
532 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
533 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
534
535 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
536 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookup);
537 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithIrq);
538 }
539 }
540 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
541 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlb);
542 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq);
543#endif
544 }
545 return off;
546}
547
548
549#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
550 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
551 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
552
553#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
554 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
555 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
556 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
557
558/** Same as iemRegAddToRip64AndFinishingNoFlags. */
559DECL_INLINE_THROW(uint32_t)
560iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
561{
562#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
563# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
564 if (!pReNative->Core.offPc)
565 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
566# endif
567
568 /* Allocate a temporary PC register. */
569 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
570
571 /* Perform the addition and store the result. */
572 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
573 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
574
575 /* Free but don't flush the PC register. */
576 iemNativeRegFreeTmp(pReNative, idxPcReg);
577#endif
578
579#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
580 pReNative->Core.offPc += cbInstr;
581 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
582# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
583 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 64);
584 off = iemNativeEmitPcDebugCheck(pReNative, off);
585# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
586 off = iemNativePcAdjustCheck(pReNative, off);
587# endif
588 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
589#endif
590
591 return off;
592}
593
594
595#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
596 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
597 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
598
599#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
600 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
601 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
602 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
603
604/** Same as iemRegAddToEip32AndFinishingNoFlags. */
605DECL_INLINE_THROW(uint32_t)
606iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
607{
608#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
609# ifdef IEMNATIVE_REG_FIXED_PC_DBG
610 if (!pReNative->Core.offPc)
611 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
612# endif
613
614 /* Allocate a temporary PC register. */
615 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
616
617 /* Perform the addition and store the result. */
618 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
619 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
620
621 /* Free but don't flush the PC register. */
622 iemNativeRegFreeTmp(pReNative, idxPcReg);
623#endif
624
625#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
626 pReNative->Core.offPc += cbInstr;
627 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
628# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
629 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 32);
630 off = iemNativeEmitPcDebugCheck(pReNative, off);
631# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
632 off = iemNativePcAdjustCheck(pReNative, off);
633# endif
634 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
635#endif
636
637 return off;
638}
639
640
641#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
642 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
643 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
644
645#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
646 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
647 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
648 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
649
650/** Same as iemRegAddToIp16AndFinishingNoFlags. */
651DECL_INLINE_THROW(uint32_t)
652iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
653{
654#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
655# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
656 if (!pReNative->Core.offPc)
657 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
658# endif
659
660 /* Allocate a temporary PC register. */
661 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
662
663 /* Perform the addition and store the result. */
664 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
665 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
666 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
667
668 /* Free but don't flush the PC register. */
669 iemNativeRegFreeTmp(pReNative, idxPcReg);
670#endif
671
672#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
673 pReNative->Core.offPc += cbInstr;
674 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
675# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
676 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 16);
677 off = iemNativeEmitPcDebugCheck(pReNative, off);
678# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
679 off = iemNativePcAdjustCheck(pReNative, off);
680# endif
681 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
682#endif
683
684 return off;
685}
686
687
688
689/*********************************************************************************************************************************
690* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
691*********************************************************************************************************************************/
692
693#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
694 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
695 (a_enmEffOpSize), pCallEntry->idxInstr); \
696 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
697
698#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
699 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
700 (a_enmEffOpSize), pCallEntry->idxInstr); \
701 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
702 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
703
704#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
705 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
706 IEMMODE_16BIT, pCallEntry->idxInstr); \
707 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
708
709#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
710 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
711 IEMMODE_16BIT, pCallEntry->idxInstr); \
712 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
713 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
714
715#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
716 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
717 IEMMODE_64BIT, pCallEntry->idxInstr); \
718 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
719
720#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
721 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
722 IEMMODE_64BIT, pCallEntry->idxInstr); \
723 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
724 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
725
726
727#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
728 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
729 (a_enmEffOpSize), pCallEntry->idxInstr); \
730 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
731
732#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
733 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
734 (a_enmEffOpSize), pCallEntry->idxInstr); \
735 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
736 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
737
738#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
739 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
740 IEMMODE_16BIT, pCallEntry->idxInstr); \
741 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
742
743#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
744 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
745 IEMMODE_16BIT, pCallEntry->idxInstr); \
746 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
747 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
748
749#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
750 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
751 IEMMODE_64BIT, pCallEntry->idxInstr); \
752 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
753
754#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
755 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
756 IEMMODE_64BIT, pCallEntry->idxInstr); \
757 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
758 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
759
760/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
761 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
762 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
763template<bool const a_fWithinPage>
764DECL_INLINE_THROW(uint32_t)
765iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
766 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
767{
768 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
769#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
770 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
771 if (a_fWithinPage && enmEffOpSize == IEMMODE_64BIT)
772 {
773 pReNative->Core.offPc += (int64_t)offDisp + cbInstr;
774# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
775 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
776# endif
777 }
778 else
779#endif
780 {
781 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
782 off = iemNativeRegFlushPendingWrites(pReNative, off);
783#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
784 Assert(pReNative->Core.offPc == 0);
785#endif
786 /* Allocate a temporary PC register. */
787 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
788
789 /* Perform the addition. */
790 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr + pReNative->Core.offPc);
791
792 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
793 {
794 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
795 We can skip this if the target is within the same page. */
796 if (!a_fWithinPage)
797 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
798 }
799 else
800 {
801 /* Just truncate the result to 16-bit IP. */
802 Assert(enmEffOpSize == IEMMODE_16BIT);
803 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
804 }
805#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
806 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
807 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
808#endif
809
810 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
811
812 /* Free but don't flush the PC register. */
813 iemNativeRegFreeTmp(pReNative, idxPcReg);
814 }
815 return off;
816}
817
818
819#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
820 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
821 (a_enmEffOpSize), pCallEntry->idxInstr); \
822 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
823
824#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
825 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
826 (a_enmEffOpSize), pCallEntry->idxInstr); \
827 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
828 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
829
830#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
831 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
832 IEMMODE_16BIT, pCallEntry->idxInstr); \
833 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
834
835#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
836 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
837 IEMMODE_16BIT, pCallEntry->idxInstr); \
838 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
839 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
840
841#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
842 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
843 IEMMODE_32BIT, pCallEntry->idxInstr); \
844 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
845
846#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
847 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
848 IEMMODE_32BIT, pCallEntry->idxInstr); \
849 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
850 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
851
852
853#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
854 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
855 (a_enmEffOpSize), pCallEntry->idxInstr); \
856 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
857
858#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
859 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
860 (a_enmEffOpSize), pCallEntry->idxInstr); \
861 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
862 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
863
864#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
865 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
866 IEMMODE_16BIT, pCallEntry->idxInstr); \
867 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
868
869#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
870 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
871 IEMMODE_16BIT, pCallEntry->idxInstr); \
872 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
873 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
874
875#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
876 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
877 IEMMODE_32BIT, pCallEntry->idxInstr); \
878 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
879
880#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
881 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
882 IEMMODE_32BIT, pCallEntry->idxInstr); \
883 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
884 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
885
886/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
887 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
888 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
889template<bool const a_fFlat>
890DECL_INLINE_THROW(uint32_t)
891iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
892 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
893{
894 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
895#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
896 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
897#endif
898
899 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
900 if (!a_fFlat || enmEffOpSize == IEMMODE_16BIT)
901 {
902 off = iemNativeRegFlushPendingWrites(pReNative, off);
903#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
904 Assert(pReNative->Core.offPc == 0);
905#endif
906 }
907
908 /* Allocate a temporary PC register. */
909 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
910
911 /* Perform the addition. */
912#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
913 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
914#else
915 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
916#endif
917
918 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
919 if (enmEffOpSize == IEMMODE_16BIT)
920 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
921
922 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
923 if (!a_fFlat)
924 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
925
926 /* Commit it. */
927#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
928 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, enmEffOpSize == IEMMODE_32BIT ? 32 : 16);
929 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
930#endif
931
932 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
933#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
934 pReNative->Core.offPc = 0;
935#endif
936
937 /* Free but don't flush the PC register. */
938 iemNativeRegFreeTmp(pReNative, idxPcReg);
939
940 return off;
941}
942
943
944#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
945 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
946 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
947
948#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
949 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
950 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
951 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
952
953#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
954 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
955 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
956
957#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
958 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
959 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
960 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
961
962#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
963 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
964 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
965
966#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
967 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
968 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
969 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
970
971/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
972DECL_INLINE_THROW(uint32_t)
973iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
974 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
975{
976 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
977 off = iemNativeRegFlushPendingWrites(pReNative, off);
978
979#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
980 Assert(pReNative->Core.offPc == 0);
981 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
982#endif
983
984 /* Allocate a temporary PC register. */
985 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
986
987 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
988 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
989 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
990 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
991#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
992 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, 16);
993 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
994#endif
995 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
996
997 /* Free but don't flush the PC register. */
998 iemNativeRegFreeTmp(pReNative, idxPcReg);
999
1000 return off;
1001}
1002
1003
1004
1005/*********************************************************************************************************************************
1006* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
1007*********************************************************************************************************************************/
1008
1009/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
1010#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
1011 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1012
1013/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
1014#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
1015 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1016
1017/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
1018#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
1019 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1020
1021/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
1022 * clears flags. */
1023#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
1024 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
1025 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1026
1027/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
1028 * clears flags. */
1029#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
1030 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
1031 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1032
1033/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
1034 * clears flags. */
1035#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
1036 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
1037 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1038
1039#undef IEM_MC_SET_RIP_U16_AND_FINISH
1040
1041
1042/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
1043#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
1044 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1045
1046/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
1047#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
1048 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1049
1050/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
1051 * clears flags. */
1052#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
1053 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
1054 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1055
1056/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
1057 * and clears flags. */
1058#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
1059 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
1060 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1061
1062#undef IEM_MC_SET_RIP_U32_AND_FINISH
1063
1064
1065/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
1066#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
1067 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1068
1069/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
1070 * and clears flags. */
1071#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
1072 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
1073 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1074
1075#undef IEM_MC_SET_RIP_U64_AND_FINISH
1076
1077
1078/** Same as iemRegRipJumpU16AndFinishNoFlags,
1079 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
1080DECL_INLINE_THROW(uint32_t)
1081iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
1082 uint8_t idxInstr, uint8_t cbVar)
1083{
1084 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1085 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1086
1087 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1088 off = iemNativeRegFlushPendingWrites(pReNative, off);
1089
1090#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1091 Assert(pReNative->Core.offPc == 0);
1092 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1093#endif
1094
1095 /* Get a register with the new PC loaded from idxVarPc.
1096 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1097 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1098
1099 /* Check limit (may #GP(0) + exit TB). */
1100 if (!f64Bit)
1101/** @todo we can skip this test in FLAT 32-bit mode. */
1102 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1103 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1104 else if (cbVar > sizeof(uint32_t))
1105 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1106
1107 /* Store the result. */
1108 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1109
1110#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1111 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1112 pReNative->Core.fDebugPcInitialized = true;
1113 Log4(("uPcUpdatingDebug=rip off=%#x\n", off));
1114#endif
1115
1116 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1117 /** @todo implictly free the variable? */
1118
1119 return off;
1120}
1121
1122
1123
1124/*********************************************************************************************************************************
1125* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1126*********************************************************************************************************************************/
1127
1128/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1129 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1130DECL_FORCE_INLINE_THROW(uint32_t)
1131iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1132{
1133 /* Use16BitSp: */
1134#ifdef RT_ARCH_AMD64
1135 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1136 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1137#else
1138 /* sub regeff, regrsp, #cbMem */
1139 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1140 /* and regeff, regeff, #0xffff */
1141 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1142 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1143 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1144 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1145#endif
1146 return off;
1147}
1148
1149
1150DECL_FORCE_INLINE(uint32_t)
1151iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1152{
1153 /* Use32BitSp: */
1154 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1155 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1156 return off;
1157}
1158
1159
1160DECL_INLINE_THROW(uint32_t)
1161iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1162 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
1163{
1164 /*
1165 * Assert sanity.
1166 */
1167#ifdef VBOX_STRICT
1168 if (RT_BYTE2(cBitsVarAndFlat) != 0)
1169 {
1170 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1171 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1172 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1173 Assert( pfnFunction
1174 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1175 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1176 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1177 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1178 : UINT64_C(0xc000b000a0009000) ));
1179 }
1180 else
1181 Assert( pfnFunction
1182 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1183 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1184 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1185 : UINT64_C(0xc000b000a0009000) ));
1186#endif
1187
1188#ifdef VBOX_STRICT
1189 /*
1190 * Check that the fExec flags we've got make sense.
1191 */
1192 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1193#endif
1194
1195 /*
1196 * To keep things simple we have to commit any pending writes first as we
1197 * may end up making calls.
1198 */
1199 /** @todo we could postpone this till we make the call and reload the
1200 * registers after returning from the call. Not sure if that's sensible or
1201 * not, though. */
1202 off = iemNativeRegFlushPendingWrites(pReNative, off);
1203
1204 /*
1205 * First we calculate the new RSP and the effective stack pointer value.
1206 * For 64-bit mode and flat 32-bit these two are the same.
1207 * (Code structure is very similar to that of PUSH)
1208 */
1209 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1210 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1211 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1212 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1213 ? cbMem : sizeof(uint16_t);
1214 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1215 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1216 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1217 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1218 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1219 if (cBitsFlat != 0)
1220 {
1221 Assert(idxRegEffSp == idxRegRsp);
1222 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1223 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1224 if (cBitsFlat == 64)
1225 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1226 else
1227 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1228 }
1229 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1230 {
1231 Assert(idxRegEffSp != idxRegRsp);
1232 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1233 kIemNativeGstRegUse_ReadOnly);
1234#ifdef RT_ARCH_AMD64
1235 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1236#else
1237 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1238#endif
1239 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1240 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1241 offFixupJumpToUseOtherBitSp = off;
1242 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1243 {
1244 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1245 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1246 }
1247 else
1248 {
1249 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1250 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1251 }
1252 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1253 }
1254 /* SpUpdateEnd: */
1255 uint32_t const offLabelSpUpdateEnd = off;
1256
1257 /*
1258 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1259 * we're skipping lookup).
1260 */
1261 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1262 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1263 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1264 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1265 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1266 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1267 : UINT32_MAX;
1268 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1269
1270
1271 if (!TlbState.fSkip)
1272 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1273 else
1274 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1275
1276 /*
1277 * Use16BitSp:
1278 */
1279 if (cBitsFlat == 0)
1280 {
1281#ifdef RT_ARCH_AMD64
1282 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1283#else
1284 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1285#endif
1286 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1287 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1288 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1289 else
1290 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1291 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1292 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1293 }
1294
1295 /*
1296 * TlbMiss:
1297 *
1298 * Call helper to do the pushing.
1299 */
1300 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1301
1302#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1303 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1304#else
1305 RT_NOREF(idxInstr);
1306#endif
1307
1308 /* Save variables in volatile registers. */
1309 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1310 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1311 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1312 | (RT_BIT_32(idxRegPc));
1313 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1314
1315 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1316 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1317 {
1318 /* Swap them using ARG0 as temp register: */
1319 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1320 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1321 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1322 }
1323 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1324 {
1325 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1326 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1327
1328 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1329 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1330 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1331 }
1332 else
1333 {
1334 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1335 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1336
1337 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1338 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1339 }
1340
1341 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1342 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1343
1344 /* Done setting up parameters, make the call. */
1345 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1346
1347 /* Restore variables and guest shadow registers to volatile registers. */
1348 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1349 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1350
1351#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1352 if (!TlbState.fSkip)
1353 {
1354 /* end of TlbMiss - Jump to the done label. */
1355 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1356 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1357
1358 /*
1359 * TlbLookup:
1360 */
1361 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1362 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1363
1364 /*
1365 * Emit code to do the actual storing / fetching.
1366 */
1367 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1368# ifdef IEM_WITH_TLB_STATISTICS
1369 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1370 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1371# endif
1372 switch (cbMemAccess)
1373 {
1374 case 2:
1375 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1376 break;
1377 case 4:
1378 if (!fIsIntelSeg)
1379 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1380 else
1381 {
1382 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1383 PUSH FS in real mode, so we have to try emulate that here.
1384 We borrow the now unused idxReg1 from the TLB lookup code here. */
1385 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1386 kIemNativeGstReg_EFlags);
1387 if (idxRegEfl != UINT8_MAX)
1388 {
1389#ifdef ARCH_AMD64
1390 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1391 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1392 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1393#else
1394 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1395 off, TlbState.idxReg1, idxRegEfl,
1396 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1397#endif
1398 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1399 }
1400 else
1401 {
1402 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1403 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1404 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1405 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1406 }
1407 /* ASSUMES the upper half of idxRegPc is ZERO. */
1408 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1409 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1410 }
1411 break;
1412 case 8:
1413 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1414 break;
1415 default:
1416 AssertFailed();
1417 }
1418
1419 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1420 TlbState.freeRegsAndReleaseVars(pReNative);
1421
1422 /*
1423 * TlbDone:
1424 *
1425 * Commit the new RSP value.
1426 */
1427 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1428 }
1429#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1430
1431#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1432 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1433#endif
1434 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1435 if (idxRegEffSp != idxRegRsp)
1436 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1437
1438 return off;
1439}
1440
1441
1442/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1443#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1444 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1445
1446/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1447 * clears flags. */
1448#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1449 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1450 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1451
1452/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1453#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1454 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1455
1456/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1457 * clears flags. */
1458#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1459 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1460 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1461
1462#undef IEM_MC_IND_CALL_U16_AND_FINISH
1463
1464
1465/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1466#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1467 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1468
1469/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1470 * clears flags. */
1471#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1472 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1473 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1474
1475#undef IEM_MC_IND_CALL_U32_AND_FINISH
1476
1477
1478/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1479 * an extra parameter, for use in 64-bit code. */
1480#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1481 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1482
1483
1484/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1485 * an extra parameter, for use in 64-bit code and we need to check and clear
1486 * flags. */
1487#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1488 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1489 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1490
1491#undef IEM_MC_IND_CALL_U64_AND_FINISH
1492
1493/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1494 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1495DECL_INLINE_THROW(uint32_t)
1496iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1497 uint8_t idxInstr, uint8_t cbVar)
1498{
1499 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1500 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1501
1502 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1503 off = iemNativeRegFlushPendingWrites(pReNative, off);
1504
1505#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1506 Assert(pReNative->Core.offPc == 0);
1507 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1508#endif
1509
1510 /* Get a register with the new PC loaded from idxVarPc.
1511 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1512 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1513
1514 /* Check limit (may #GP(0) + exit TB). */
1515 if (!f64Bit)
1516/** @todo we can skip this test in FLAT 32-bit mode. */
1517 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1518 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1519 else if (cbVar > sizeof(uint32_t))
1520 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1521
1522#if 1
1523 /* Allocate a temporary PC register, we don't want it shadowed. */
1524 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1525 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1526#else
1527 /* Allocate a temporary PC register. */
1528 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1529 true /*fNoVolatileRegs*/);
1530#endif
1531
1532 /* Perform the addition and push the variable to the guest stack. */
1533 /** @todo Flat variants for PC32 variants. */
1534 switch (cbVar)
1535 {
1536 case sizeof(uint16_t):
1537 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1538 /* Truncate the result to 16-bit IP. */
1539 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1540 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1541 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1542 break;
1543 case sizeof(uint32_t):
1544 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1545 /** @todo In FLAT mode we can use the flat variant. */
1546 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1547 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1548 break;
1549 case sizeof(uint64_t):
1550 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1551 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1552 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1553 break;
1554 default:
1555 AssertFailed();
1556 }
1557
1558 /* RSP got changed, so do this again. */
1559 off = iemNativeRegFlushPendingWrites(pReNative, off);
1560
1561 /* Store the result. */
1562 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1563#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1564 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1565 pReNative->Core.fDebugPcInitialized = true;
1566 Log4(("uPcUpdatingDebug=rip/indirect-call off=%#x\n", off));
1567#endif
1568
1569#if 1
1570 /* Need to transfer the shadow information to the new RIP register. */
1571 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1572#else
1573 /* Sync the new PC. */
1574 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1575#endif
1576 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1577 iemNativeRegFreeTmp(pReNative, idxPcReg);
1578 /** @todo implictly free the variable? */
1579
1580 return off;
1581}
1582
1583
1584/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1585 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1586#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1587 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1588
1589/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1590 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1591 * flags. */
1592#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1593 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1594 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1595
1596/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1597 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1598#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1599 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1600
1601/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1602 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1603 * flags. */
1604#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1605 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1606 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1607
1608/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1609 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1610#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1611 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1612
1613/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1614 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1615 * flags. */
1616#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1617 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1618 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1619
1620#undef IEM_MC_REL_CALL_S16_AND_FINISH
1621
1622/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1623 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1624DECL_INLINE_THROW(uint32_t)
1625iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
1626 uint8_t idxInstr)
1627{
1628 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1629 off = iemNativeRegFlushPendingWrites(pReNative, off);
1630
1631#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1632 Assert(pReNative->Core.offPc == 0);
1633 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1634#endif
1635
1636 /* Allocate a temporary PC register. */
1637 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1638 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1639 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1640
1641 /* Calculate the new RIP. */
1642 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1643 /* Truncate the result to 16-bit IP. */
1644 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
1645 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1646 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1647
1648 /* Truncate the result to 16-bit IP. */
1649 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
1650
1651 /* Check limit (may #GP(0) + exit TB). */
1652 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1653
1654 /* Perform the addition and push the variable to the guest stack. */
1655 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1656 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1657
1658 /* RSP got changed, so flush again. */
1659 off = iemNativeRegFlushPendingWrites(pReNative, off);
1660
1661 /* Store the result. */
1662 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1663#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1664 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1665 pReNative->Core.fDebugPcInitialized = true;
1666 Log4(("uPcUpdatingDebug=rip/rel-call-16 off=%#x offDisp=%d\n", off, offDisp));
1667#endif
1668
1669 /* Need to transfer the shadow information to the new RIP register. */
1670 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1671 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1672 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1673
1674 return off;
1675}
1676
1677
1678/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1679 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1680#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
1681 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
1682
1683/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1684 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1685 * flags. */
1686#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
1687 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
1688 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1689
1690#undef IEM_MC_REL_CALL_S32_AND_FINISH
1691
1692/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1693 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1694DECL_INLINE_THROW(uint32_t)
1695iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
1696 uint8_t idxInstr)
1697{
1698 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1699 off = iemNativeRegFlushPendingWrites(pReNative, off);
1700
1701#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1702 Assert(pReNative->Core.offPc == 0);
1703 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1704#endif
1705
1706 /* Allocate a temporary PC register. */
1707 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1708 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1709 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1710
1711 /* Update the EIP to get the return address. */
1712 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1713
1714 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1715 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1716 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1717 /** @todo we can skip this test in FLAT 32-bit mode. */
1718 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1719
1720 /* Perform Perform the return address to the guest stack. */
1721 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
1722 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1723 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1724
1725 /* RSP got changed, so do this again. */
1726 off = iemNativeRegFlushPendingWrites(pReNative, off);
1727
1728 /* Store the result. */
1729 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1730#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1731 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1732 pReNative->Core.fDebugPcInitialized = true;
1733 Log4(("uPcUpdatingDebug=eip/rel-call-32 off=%#x offDisp=%d\n", off, offDisp));
1734#endif
1735
1736 /* Need to transfer the shadow information to the new RIP register. */
1737 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1738 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1739 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1740
1741 return off;
1742}
1743
1744
1745/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1746 * an extra parameter, for use in 64-bit code. */
1747#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
1748 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
1749
1750/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1751 * an extra parameter, for use in 64-bit code and we need to check and clear
1752 * flags. */
1753#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
1754 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
1755 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1756
1757#undef IEM_MC_REL_CALL_S64_AND_FINISH
1758
1759/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1760 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1761DECL_INLINE_THROW(uint32_t)
1762iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
1763 uint8_t idxInstr)
1764{
1765 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1766 off = iemNativeRegFlushPendingWrites(pReNative, off);
1767
1768#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1769 Assert(pReNative->Core.offPc == 0);
1770 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1771#endif
1772
1773 /* Allocate a temporary PC register. */
1774 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1775 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1776 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1777
1778 /* Update the RIP to get the return address. */
1779 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
1780
1781 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1782 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
1783 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
1784 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1785
1786 /* Perform Perform the return address to the guest stack. */
1787 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1788 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1789
1790 /* RSP got changed, so do this again. */
1791 off = iemNativeRegFlushPendingWrites(pReNative, off);
1792
1793 /* Store the result. */
1794 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1795#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1796 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1797 pReNative->Core.fDebugPcInitialized = true;
1798 Log4(("uPcUpdatingDebug=rip/rel-call-64 off=%#x offDisp=%ld\n", off, offDisp));
1799#endif
1800
1801 /* Need to transfer the shadow information to the new RIP register. */
1802 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1803 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1804 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1805
1806 return off;
1807}
1808
1809
1810/*********************************************************************************************************************************
1811* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
1812*********************************************************************************************************************************/
1813
1814DECL_FORCE_INLINE_THROW(uint32_t)
1815iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1816 uint16_t cbPopAdd, uint8_t idxRegTmp)
1817{
1818 /* Use16BitSp: */
1819#ifdef RT_ARCH_AMD64
1820 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1821 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1822 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
1823 RT_NOREF(idxRegTmp);
1824
1825#elif defined(RT_ARCH_ARM64)
1826 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
1827 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
1828 /* add tmp, regrsp, #cbMem */
1829 uint16_t const cbCombined = cbMem + cbPopAdd;
1830 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
1831 if (cbCombined >= RT_BIT_32(12))
1832 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
1833 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1834 /* and tmp, tmp, #0xffff */
1835 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1836 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
1837 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
1838 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
1839
1840#else
1841# error "Port me"
1842#endif
1843 return off;
1844}
1845
1846
1847DECL_FORCE_INLINE_THROW(uint32_t)
1848iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1849 uint16_t cbPopAdd)
1850{
1851 /* Use32BitSp: */
1852 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1853 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
1854 return off;
1855}
1856
1857
1858/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
1859#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
1860 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
1861
1862/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
1863#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1864 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1865
1866/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
1867#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1868 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1869
1870/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
1871 * clears flags. */
1872#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
1873 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
1874 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1875
1876/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
1877 * clears flags. */
1878#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1879 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1880 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1881
1882/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
1883 * clears flags. */
1884#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1885 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1886 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1887
1888/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
1889DECL_INLINE_THROW(uint32_t)
1890iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
1891 IEMMODE enmEffOpSize, uint8_t idxInstr)
1892{
1893 RT_NOREF(cbInstr);
1894
1895#ifdef VBOX_STRICT
1896 /*
1897 * Check that the fExec flags we've got make sense.
1898 */
1899 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1900#endif
1901
1902 /*
1903 * To keep things simple we have to commit any pending writes first as we
1904 * may end up making calls.
1905 */
1906 off = iemNativeRegFlushPendingWrites(pReNative, off);
1907
1908 /*
1909 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
1910 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
1911 * directly as the effective stack pointer.
1912 * (Code structure is very similar to that of PUSH)
1913 *
1914 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
1915 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
1916 * aren't commonly used (or useful) and thus not in need of optimizing.
1917 *
1918 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
1919 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
1920 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
1921 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
1922 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
1923 */
1924 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
1925 ? sizeof(uint64_t)
1926 : enmEffOpSize == IEMMODE_32BIT
1927 ? sizeof(uint32_t)
1928 : sizeof(uint16_t);
1929 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
1930 uintptr_t const pfnFunction = fFlat
1931 ? enmEffOpSize == IEMMODE_64BIT
1932 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
1933 : (uintptr_t)iemNativeHlpStackFlatFetchU32
1934 : enmEffOpSize == IEMMODE_32BIT
1935 ? (uintptr_t)iemNativeHlpStackFetchU32
1936 : (uintptr_t)iemNativeHlpStackFetchU16;
1937 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1938 fFlat ? kIemNativeGstRegUse_ForUpdate
1939 : kIemNativeGstRegUse_Calculation,
1940 true /*fNoVolatileRegs*/);
1941 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1942 /** @todo can do a better job picking the register here. For cbMem >= 4 this
1943 * will be the resulting register value. */
1944 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
1945
1946 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1947 if (fFlat)
1948 Assert(idxRegEffSp == idxRegRsp);
1949 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1950 {
1951 Assert(idxRegEffSp != idxRegRsp);
1952 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1953 kIemNativeGstRegUse_ReadOnly);
1954#ifdef RT_ARCH_AMD64
1955 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1956#else
1957 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1958#endif
1959 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1960 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1961 offFixupJumpToUseOtherBitSp = off;
1962 if (enmEffOpSize == IEMMODE_32BIT)
1963 {
1964 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1965 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1966 }
1967 else
1968 {
1969 Assert(enmEffOpSize == IEMMODE_16BIT);
1970 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1971 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1972 idxRegMemResult);
1973 }
1974 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1975 }
1976 /* SpUpdateEnd: */
1977 uint32_t const offLabelSpUpdateEnd = off;
1978
1979 /*
1980 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1981 * we're skipping lookup).
1982 */
1983 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
1984 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1985 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1986 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1987 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1988 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1989 : UINT32_MAX;
1990
1991 if (!TlbState.fSkip)
1992 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1993 else
1994 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1995
1996 /*
1997 * Use16BitSp:
1998 */
1999 if (!fFlat)
2000 {
2001#ifdef RT_ARCH_AMD64
2002 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2003#else
2004 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2005#endif
2006 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
2007 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
2008 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2009 idxRegMemResult);
2010 else
2011 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2012 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
2013 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2014 }
2015
2016 /*
2017 * TlbMiss:
2018 *
2019 * Call helper to do the pushing.
2020 */
2021 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
2022
2023#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2024 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2025#else
2026 RT_NOREF(idxInstr);
2027#endif
2028
2029 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
2030 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
2031 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
2032 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
2033
2034
2035 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
2036 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
2037 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
2038
2039 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
2040 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2041
2042 /* Done setting up parameters, make the call. */
2043 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
2044
2045 /* Move the return register content to idxRegMemResult. */
2046 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
2047 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
2048
2049 /* Restore variables and guest shadow registers to volatile registers. */
2050 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
2051 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
2052
2053#ifdef IEMNATIVE_WITH_TLB_LOOKUP
2054 if (!TlbState.fSkip)
2055 {
2056 /* end of TlbMiss - Jump to the done label. */
2057 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
2058 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
2059
2060 /*
2061 * TlbLookup:
2062 */
2063 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
2064 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
2065
2066 /*
2067 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
2068 */
2069 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2070# ifdef IEM_WITH_TLB_STATISTICS
2071 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
2072 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
2073# endif
2074 switch (cbMem)
2075 {
2076 case 2:
2077 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2078 break;
2079 case 4:
2080 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2081 break;
2082 case 8:
2083 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2084 break;
2085 default:
2086 AssertFailed();
2087 }
2088
2089 TlbState.freeRegsAndReleaseVars(pReNative);
2090
2091 /*
2092 * TlbDone:
2093 *
2094 * Set the new RSP value (FLAT accesses needs to calculate it first) and
2095 * commit the popped register value.
2096 */
2097 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
2098 }
2099#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
2100
2101 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
2102 if (!f64Bit)
2103/** @todo we can skip this test in FLAT 32-bit mode. */
2104 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2105 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2106 else if (enmEffOpSize == IEMMODE_64BIT)
2107 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2108
2109 /* Complete RSP calculation for FLAT mode. */
2110 if (idxRegEffSp == idxRegRsp)
2111 {
2112 if (enmEffOpSize == IEMMODE_64BIT)
2113 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
2114 else
2115 {
2116 Assert(enmEffOpSize == IEMMODE_32BIT);
2117 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
2118 }
2119 }
2120
2121 /* Commit the result and clear any current guest shadows for RIP. */
2122 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
2123 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2124 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2125#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2126 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2127 pReNative->Core.fDebugPcInitialized = true;
2128 Log4(("uPcUpdatingDebug=rip/ret off=%#x\n", off));
2129#endif
2130
2131 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2132 if (!fFlat)
2133 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2134
2135 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2136 if (idxRegEffSp != idxRegRsp)
2137 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2138 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2139 return off;
2140}
2141
2142
2143/*********************************************************************************************************************************
2144* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2145*********************************************************************************************************************************/
2146
2147#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2148 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2149
2150/**
2151 * Emits code to check if a \#NM exception should be raised.
2152 *
2153 * @returns New code buffer offset, UINT32_MAX on failure.
2154 * @param pReNative The native recompile state.
2155 * @param off The code buffer offset.
2156 * @param idxInstr The current instruction.
2157 */
2158DECL_INLINE_THROW(uint32_t)
2159iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2160{
2161#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2162 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2163
2164 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2165 {
2166#endif
2167 /*
2168 * Make sure we don't have any outstanding guest register writes as we may
2169 * raise an #NM and all guest register must be up to date in CPUMCTX.
2170 */
2171 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2172 off = iemNativeRegFlushPendingWrites(pReNative, off);
2173
2174#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2175 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2176#else
2177 RT_NOREF(idxInstr);
2178#endif
2179
2180 /* Allocate a temporary CR0 register. */
2181 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2182 kIemNativeGstRegUse_ReadOnly);
2183
2184 /*
2185 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2186 * return raisexcpt();
2187 */
2188 /* Test and jump. */
2189 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS,
2190 kIemNativeLabelType_RaiseNm);
2191
2192 /* Free but don't flush the CR0 register. */
2193 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2194
2195#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2196 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2197 }
2198 else
2199 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2200#endif
2201
2202 return off;
2203}
2204
2205
2206#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2207 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2208
2209/**
2210 * Emits code to check if a \#NM exception should be raised.
2211 *
2212 * @returns New code buffer offset, UINT32_MAX on failure.
2213 * @param pReNative The native recompile state.
2214 * @param off The code buffer offset.
2215 * @param idxInstr The current instruction.
2216 */
2217DECL_INLINE_THROW(uint32_t)
2218iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2219{
2220#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2221 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2222
2223 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2224 {
2225#endif
2226 /*
2227 * Make sure we don't have any outstanding guest register writes as we may
2228 * raise an #NM and all guest register must be up to date in CPUMCTX.
2229 */
2230 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2231 off = iemNativeRegFlushPendingWrites(pReNative, off);
2232
2233#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2234 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2235#else
2236 RT_NOREF(idxInstr);
2237#endif
2238
2239 /* Allocate a temporary CR0 register. */
2240 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2241 kIemNativeGstRegUse_Calculation);
2242
2243 /*
2244 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2245 * return raisexcpt();
2246 */
2247 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2248 /* Test and jump. */
2249 off = iemNativeEmitTestIfGpr32EqualsImmAndTbExit(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS,
2250 kIemNativeLabelType_RaiseNm);
2251
2252 /* Free the CR0 register. */
2253 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2254
2255#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2256 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2257 }
2258 else
2259 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2260#endif
2261
2262 return off;
2263}
2264
2265
2266#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2267 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2268
2269/**
2270 * Emits code to check if a \#MF exception should be raised.
2271 *
2272 * @returns New code buffer offset, UINT32_MAX on failure.
2273 * @param pReNative The native recompile state.
2274 * @param off The code buffer offset.
2275 * @param idxInstr The current instruction.
2276 */
2277DECL_INLINE_THROW(uint32_t)
2278iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2279{
2280 /*
2281 * Make sure we don't have any outstanding guest register writes as we may
2282 * raise an #MF and all guest register must be up to date in CPUMCTX.
2283 */
2284 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2285 off = iemNativeRegFlushPendingWrites(pReNative, off);
2286
2287#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2288 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2289#else
2290 RT_NOREF(idxInstr);
2291#endif
2292
2293 /* Allocate a temporary FSW register. */
2294 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2295 kIemNativeGstRegUse_ReadOnly);
2296
2297 /*
2298 * if (FSW & X86_FSW_ES != 0)
2299 * return raisexcpt();
2300 */
2301 /* Test and jump. */
2302 off = iemNativeEmitTestBitInGprAndTbExitIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, kIemNativeLabelType_RaiseMf);
2303
2304 /* Free but don't flush the FSW register. */
2305 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2306
2307 return off;
2308}
2309
2310
2311#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2312 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2313
2314/**
2315 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2316 *
2317 * @returns New code buffer offset, UINT32_MAX on failure.
2318 * @param pReNative The native recompile state.
2319 * @param off The code buffer offset.
2320 * @param idxInstr The current instruction.
2321 */
2322DECL_INLINE_THROW(uint32_t)
2323iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2324{
2325#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2326 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2327
2328 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2329 {
2330#endif
2331 /*
2332 * Make sure we don't have any outstanding guest register writes as we may
2333 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2334 */
2335 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2336 off = iemNativeRegFlushPendingWrites(pReNative, off);
2337
2338#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2339 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2340#else
2341 RT_NOREF(idxInstr);
2342#endif
2343
2344 /* Allocate a temporary CR0 and CR4 register. */
2345 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2346 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2347 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2348
2349 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2350#ifdef RT_ARCH_AMD64
2351 /*
2352 * We do a modified test here:
2353 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2354 * else { goto RaiseSseRelated; }
2355 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2356 * all targets except the 386, which doesn't support SSE, this should
2357 * be a safe assumption.
2358 */
2359 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2360 //pCodeBuf[off++] = 0xcc;
2361 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2362 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2363 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2364 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2365 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2366 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseSseRelated, kIemNativeInstrCond_ne);
2367
2368#elif defined(RT_ARCH_ARM64)
2369 /*
2370 * We do a modified test here:
2371 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2372 * else { goto RaiseSseRelated; }
2373 */
2374 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2375 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2376 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2377 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2378 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2379 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2380 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2381 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2382 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2383 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2384 kIemNativeLabelType_RaiseSseRelated);
2385
2386#else
2387# error "Port me!"
2388#endif
2389
2390 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2391 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2392 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2393 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2394
2395#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2396 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2397 }
2398 else
2399 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2400#endif
2401
2402 return off;
2403}
2404
2405
2406#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2407 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2408
2409/**
2410 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2411 *
2412 * @returns New code buffer offset, UINT32_MAX on failure.
2413 * @param pReNative The native recompile state.
2414 * @param off The code buffer offset.
2415 * @param idxInstr The current instruction.
2416 */
2417DECL_INLINE_THROW(uint32_t)
2418iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2419{
2420#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2421 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2422
2423 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2424 {
2425#endif
2426 /*
2427 * Make sure we don't have any outstanding guest register writes as we may
2428 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2429 */
2430 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2431 off = iemNativeRegFlushPendingWrites(pReNative, off);
2432
2433#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2434 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2435#else
2436 RT_NOREF(idxInstr);
2437#endif
2438
2439 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2440 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2441 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2442 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2443 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2444
2445 /*
2446 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2447 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2448 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2449 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2450 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2451 * { likely }
2452 * else { goto RaiseAvxRelated; }
2453 */
2454#ifdef RT_ARCH_AMD64
2455 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2456 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2457 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2458 ^ 0x1a) ) { likely }
2459 else { goto RaiseAvxRelated; } */
2460 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2461 //pCodeBuf[off++] = 0xcc;
2462 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2463 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2464 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2465 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2466 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2467 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2468 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2469 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2470 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2471 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2472 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseAvxRelated, kIemNativeInstrCond_ne);
2473
2474#elif defined(RT_ARCH_ARM64)
2475 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2476 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2477 else { goto RaiseAvxRelated; } */
2478 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2479 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2480 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2481 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2482 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2483 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2484 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2485 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2486 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2487 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2488 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2489 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2490 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2491 kIemNativeLabelType_RaiseAvxRelated);
2492
2493#else
2494# error "Port me!"
2495#endif
2496
2497 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2498 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2499 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2500 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2501#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2502 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2503 }
2504 else
2505 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2506#endif
2507
2508 return off;
2509}
2510
2511
2512#define IEM_MC_RAISE_DIVIDE_ERROR() \
2513 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2514
2515/**
2516 * Emits code to raise a \#DE.
2517 *
2518 * @returns New code buffer offset, UINT32_MAX on failure.
2519 * @param pReNative The native recompile state.
2520 * @param off The code buffer offset.
2521 * @param idxInstr The current instruction.
2522 */
2523DECL_INLINE_THROW(uint32_t)
2524iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2525{
2526 /*
2527 * Make sure we don't have any outstanding guest register writes as we may
2528 */
2529 off = iemNativeRegFlushPendingWrites(pReNative, off);
2530
2531#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2532 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2533#else
2534 RT_NOREF(idxInstr);
2535#endif
2536
2537 /* raise \#DE exception unconditionally. */
2538 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseDe);
2539}
2540
2541
2542#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2543 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2544
2545/**
2546 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2547 *
2548 * @returns New code buffer offset, UINT32_MAX on failure.
2549 * @param pReNative The native recompile state.
2550 * @param off The code buffer offset.
2551 * @param idxInstr The current instruction.
2552 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2553 * @param cbAlign The alignment in bytes to check against.
2554 */
2555DECL_INLINE_THROW(uint32_t)
2556iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2557 uint8_t idxVarEffAddr, uint8_t cbAlign)
2558{
2559 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2560 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2561
2562 /*
2563 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2564 */
2565 off = iemNativeRegFlushPendingWrites(pReNative, off);
2566
2567#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2568 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2569#else
2570 RT_NOREF(idxInstr);
2571#endif
2572
2573 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2574
2575 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxVarReg, cbAlign - 1,
2576 kIemNativeLabelType_RaiseGp0);
2577
2578 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2579 return off;
2580}
2581
2582
2583/*********************************************************************************************************************************
2584* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2585*********************************************************************************************************************************/
2586
2587/**
2588 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2589 *
2590 * @returns Pointer to the condition stack entry on success, NULL on failure
2591 * (too many nestings)
2592 */
2593DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
2594{
2595 uint32_t const idxStack = pReNative->cCondDepth;
2596 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2597
2598 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2599 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2600
2601 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2602 pEntry->fInElse = false;
2603 pEntry->fIfExitTb = false;
2604 pEntry->fElseExitTb = false;
2605 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2606 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2607
2608 return pEntry;
2609}
2610
2611
2612/**
2613 * Start of the if-block, snapshotting the register and variable state.
2614 */
2615DECL_INLINE_THROW(void)
2616iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
2617{
2618 Assert(offIfBlock != UINT32_MAX);
2619 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2620 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2621 Assert(!pEntry->fInElse);
2622
2623 /* Define the start of the IF block if request or for disassembly purposes. */
2624 if (idxLabelIf != UINT32_MAX)
2625 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
2626#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2627 else
2628 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
2629#else
2630 RT_NOREF(offIfBlock);
2631#endif
2632
2633 /* Copy the initial state so we can restore it in the 'else' block. */
2634 pEntry->InitialState = pReNative->Core;
2635}
2636
2637
2638#define IEM_MC_ELSE() } while (0); \
2639 off = iemNativeEmitElse(pReNative, off); \
2640 do {
2641
2642/** Emits code related to IEM_MC_ELSE. */
2643DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2644{
2645 /* Check sanity and get the conditional stack entry. */
2646 Assert(off != UINT32_MAX);
2647 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2648 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2649 Assert(!pEntry->fInElse);
2650
2651 /* We can skip dirty register flushing and the dirty register flushing if
2652 the branch already jumped to a TB exit. */
2653 if (!pEntry->fIfExitTb)
2654 {
2655#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) && 0
2656 /* Writeback any dirty shadow registers. */
2657 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2658 * in one of the branches and leave guest registers already dirty before the start of the if
2659 * block alone. */
2660 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2661#endif
2662
2663 /* Jump to the endif. */
2664 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
2665 }
2666# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2667 else
2668 Assert(pReNative->Core.offPc == 0);
2669# endif
2670
2671 /* Define the else label and enter the else part of the condition. */
2672 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2673 pEntry->fInElse = true;
2674
2675 /* Snapshot the core state so we can do a merge at the endif and restore
2676 the snapshot we took at the start of the if-block. */
2677 pEntry->IfFinalState = pReNative->Core;
2678 pReNative->Core = pEntry->InitialState;
2679
2680 return off;
2681}
2682
2683
2684#define IEM_MC_ENDIF() } while (0); \
2685 off = iemNativeEmitEndIf(pReNative, off)
2686
2687/** Emits code related to IEM_MC_ENDIF. */
2688DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2689{
2690 /* Check sanity and get the conditional stack entry. */
2691 Assert(off != UINT32_MAX);
2692 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2693 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2694
2695#if defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) && 0
2696 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2697#endif
2698
2699 /*
2700 * If either of the branches exited the TB, we can take the state from the
2701 * other branch and skip all the merging headache.
2702 */
2703 bool fDefinedLabels = false;
2704 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
2705 {
2706#ifdef VBOX_STRICT
2707 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
2708 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
2709 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
2710 ? &pEntry->IfFinalState : &pReNative->Core;
2711# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2712 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
2713# endif
2714# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2715 Assert(pExitCoreState->offPc == 0);
2716# endif
2717 RT_NOREF(pExitCoreState);
2718#endif
2719
2720 if (!pEntry->fIfExitTb)
2721 {
2722 Assert(pEntry->fInElse);
2723 pReNative->Core = pEntry->IfFinalState;
2724 }
2725 }
2726 else
2727 {
2728 /*
2729 * Now we have find common group with the core state at the end of the
2730 * if-final. Use the smallest common denominator and just drop anything
2731 * that isn't the same in both states.
2732 */
2733 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
2734 * which is why we're doing this at the end of the else-block.
2735 * But we'd need more info about future for that to be worth the effort. */
2736 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
2737#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2738 AssertMsgStmt(pReNative->Core.offPc == pOther->offPc,
2739 ("Core.offPc=%#RX64 pOther->offPc=%#RX64\n", pReNative->Core.offPc, pOther->offPc),
2740 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
2741#endif
2742
2743 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
2744 {
2745#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2746 /*
2747 * If the branch has differences in dirty shadow registers, we will flush
2748 * the register only dirty in the current branch and dirty any that's only
2749 * dirty in the other one.
2750 */
2751 uint64_t const fGstRegDirtyOther = pOther->bmGstRegShadowDirty;
2752 uint64_t const fGstRegDirtyThis = pReNative->Core.bmGstRegShadowDirty;
2753 uint64_t const fGstRegDirtyDiff = fGstRegDirtyOther ^ fGstRegDirtyThis;
2754 uint64_t const fGstRegDirtyHead = fGstRegDirtyThis & fGstRegDirtyDiff;
2755 uint64_t fGstRegDirtyTail = fGstRegDirtyOther & fGstRegDirtyDiff;
2756 if (!fGstRegDirtyDiff)
2757 { /* likely */ }
2758 else
2759 {
2760 //uint64_t const fGstRegDirtyHead = pReNative->Core.bmGstRegShadowDirty & fGstRegDirtyDiff;
2761 if (fGstRegDirtyHead)
2762 {
2763 Log12(("iemNativeEmitEndIf: flushing dirty guest registers in current branch: %RX64\n", fGstRegDirtyHead));
2764 off = iemNativeRegFlushDirtyGuest(pReNative, off, fGstRegDirtyHead);
2765 }
2766 }
2767#endif
2768
2769 /*
2770 * Shadowed guest registers.
2771 *
2772 * We drop any shadows where the two states disagree about where
2773 * things are kept. We may end up flushing dirty more registers
2774 * here, if the two branches keeps things in different registers.
2775 */
2776 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
2777 if (fGstRegs)
2778 {
2779 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
2780 do
2781 {
2782 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2783 fGstRegs &= ~RT_BIT_64(idxGstReg);
2784
2785 uint8_t const idxCurHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
2786 uint8_t const idxOtherHstReg = pOther->aidxGstRegShadows[idxGstReg];
2787 if ( idxCurHstReg != idxOtherHstReg
2788 || !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg)))
2789 {
2790#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2791 Log12(("iemNativeEmitEndIf: dropping gst %s (%d) from hst %s (other %d/%#RX64)\n",
2792 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
2793 idxOtherHstReg, pOther->bmGstRegShadows));
2794#else
2795 Log12(("iemNativeEmitEndIf: dropping %s gst %s (%d) from hst %s (other %d/%#RX64/%s)\n",
2796 pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "_dirty_" : "clean",
2797 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
2798 idxOtherHstReg, pOther->bmGstRegShadows,
2799 pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "dirty" : "clean"));
2800 if (pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
2801 fGstRegDirtyTail |= RT_BIT_64(idxGstReg);
2802 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
2803 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2804#endif
2805 iemNativeRegClearGstRegShadowingOne(pReNative, idxCurHstReg, (IEMNATIVEGSTREG)idxGstReg, off);
2806 }
2807 } while (fGstRegs);
2808 }
2809 else
2810 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
2811
2812#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2813 /*
2814 * Generate jumpy code for flushing dirty registers from the other
2815 * branch that aren't dirty in the current one.
2816 */
2817 if (!fGstRegDirtyTail)
2818 { /* likely */ }
2819 else
2820 {
2821 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEndIfOtherBranchDirty);
2822 Log12(("iemNativeEmitEndIf: Dirty register only in the other branch: %#RX64 - BAD!\n", fGstRegDirtyTail));
2823
2824 /* First the current branch has to jump over the dirty flushing from the other branch. */
2825 uint32_t const offFixup1 = off;
2826 off = iemNativeEmitJmpToFixed(pReNative, off, off + 10);
2827
2828 /* Put the endif and maybe else label here so the other branch ends up here. */
2829 if (!pEntry->fInElse)
2830 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2831 else
2832 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
2833 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
2834 fDefinedLabels = true;
2835
2836 /* Flush the dirty guest registers from the other branch. */
2837 while (fGstRegDirtyTail)
2838 {
2839 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegDirtyTail) - 1;
2840 fGstRegDirtyTail &= ~RT_BIT_64(idxGstReg);
2841 Log12(("iemNativeEmitEndIf: tail flushing %s (%d) from other branch %d (cur %d/%#RX64)\n",
2842 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, pOther->aidxGstRegShadows[idxGstReg],
2843 pReNative->Core.aidxGstRegShadows[idxGstReg], pReNative->Core.bmGstRegShadows));
2844
2845 off = iemNativeRegFlushPendingWriteEx(pReNative, off, (PIEMNATIVECORESTATE)pOther, (IEMNATIVEGSTREG)idxGstReg);
2846
2847 /* Mismatching shadowing should've been dropped in the previous step already. */
2848 Assert( !(pReNative->Core.bmGstRegShadows & RT_BIT_64(idxGstReg))
2849 || pReNative->Core.aidxGstRegShadows[idxGstReg] == pOther->aidxGstRegShadows[idxGstReg]);
2850 }
2851
2852 /* Here is the actual endif label, fixup the above jump to land here. */
2853 iemNativeFixupFixedJump(pReNative, offFixup1, off);
2854 }
2855#endif
2856
2857 /*
2858 * Check variables next. For now we must require them to be identical
2859 * or stuff we can recreate. (No code is emitted here.)
2860 */
2861 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
2862#ifdef VBOX_STRICT
2863 uint32_t const offAssert = off;
2864#endif
2865 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
2866 if (fVars)
2867 {
2868 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
2869 do
2870 {
2871 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
2872 fVars &= ~RT_BIT_32(idxVar);
2873
2874 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
2875 {
2876 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
2877 continue;
2878 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2879 {
2880 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2881 if (idxHstReg != UINT8_MAX)
2882 {
2883 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2884 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2885 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
2886 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2887 }
2888 continue;
2889 }
2890 }
2891 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
2892 continue;
2893
2894 /* Irreconcilable, so drop it. */
2895 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2896 if (idxHstReg != UINT8_MAX)
2897 {
2898 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2899 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2900 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
2901 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2902 }
2903 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2904 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
2905 } while (fVars);
2906 }
2907 Assert(off == offAssert);
2908
2909 /*
2910 * Finally, check that the host register allocations matches.
2911 */
2912 AssertMsgStmt((pReNative->Core.bmHstRegs & (pReNative->Core.bmHstRegs ^ pOther->bmHstRegs)) == 0,
2913 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
2914 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
2915 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
2916 }
2917 }
2918
2919 /*
2920 * Define the endif label and maybe the else one if we're still in the 'if' part.
2921 */
2922 if (!fDefinedLabels)
2923 {
2924 if (!pEntry->fInElse)
2925 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2926 else
2927 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
2928 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
2929 }
2930
2931 /* Pop the conditional stack.*/
2932 pReNative->cCondDepth -= 1;
2933
2934 return off;
2935}
2936
2937
2938#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
2939 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
2940 do {
2941
2942/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
2943DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2944{
2945 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2946 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
2947
2948 /* Get the eflags. */
2949 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2950 kIemNativeGstRegUse_ReadOnly);
2951
2952 /* Test and jump. */
2953 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2954
2955 /* Free but don't flush the EFlags register. */
2956 iemNativeRegFreeTmp(pReNative, idxEflReg);
2957
2958 /* Make a copy of the core state now as we start the if-block. */
2959 iemNativeCondStartIfBlock(pReNative, off);
2960
2961 return off;
2962}
2963
2964
2965#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
2966 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
2967 do {
2968
2969/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
2970DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2971{
2972 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2973 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
2974
2975 /* Get the eflags. */
2976 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2977 kIemNativeGstRegUse_ReadOnly);
2978
2979 /* Test and jump. */
2980 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2981
2982 /* Free but don't flush the EFlags register. */
2983 iemNativeRegFreeTmp(pReNative, idxEflReg);
2984
2985 /* Make a copy of the core state now as we start the if-block. */
2986 iemNativeCondStartIfBlock(pReNative, off);
2987
2988 return off;
2989}
2990
2991
2992#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
2993 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
2994 do {
2995
2996/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
2997DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2998{
2999 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3000 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3001
3002 /* Get the eflags. */
3003 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3004 kIemNativeGstRegUse_ReadOnly);
3005
3006 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3007 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3008
3009 /* Test and jump. */
3010 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3011
3012 /* Free but don't flush the EFlags register. */
3013 iemNativeRegFreeTmp(pReNative, idxEflReg);
3014
3015 /* Make a copy of the core state now as we start the if-block. */
3016 iemNativeCondStartIfBlock(pReNative, off);
3017
3018 return off;
3019}
3020
3021
3022#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
3023 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
3024 do {
3025
3026/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
3027DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
3028{
3029 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3030 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3031
3032 /* Get the eflags. */
3033 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3034 kIemNativeGstRegUse_ReadOnly);
3035
3036 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3037 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3038
3039 /* Test and jump. */
3040 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3041
3042 /* Free but don't flush the EFlags register. */
3043 iemNativeRegFreeTmp(pReNative, idxEflReg);
3044
3045 /* Make a copy of the core state now as we start the if-block. */
3046 iemNativeCondStartIfBlock(pReNative, off);
3047
3048 return off;
3049}
3050
3051
3052#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
3053 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
3054 do {
3055
3056#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
3057 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
3058 do {
3059
3060/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
3061DECL_INLINE_THROW(uint32_t)
3062iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3063 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
3064{
3065 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
3066 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3067
3068 /* Get the eflags. */
3069 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3070 kIemNativeGstRegUse_ReadOnly);
3071
3072 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
3073 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
3074
3075 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
3076 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
3077 Assert(iBitNo1 != iBitNo2);
3078
3079#ifdef RT_ARCH_AMD64
3080 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
3081
3082 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3083 if (iBitNo1 > iBitNo2)
3084 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3085 else
3086 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3087 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3088
3089#elif defined(RT_ARCH_ARM64)
3090 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3091 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3092
3093 /* and tmpreg, eflreg, #1<<iBitNo1 */
3094 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3095
3096 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3097 if (iBitNo1 > iBitNo2)
3098 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3099 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3100 else
3101 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3102 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3103
3104 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3105
3106#else
3107# error "Port me"
3108#endif
3109
3110 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3111 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3112 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3113
3114 /* Free but don't flush the EFlags and tmp registers. */
3115 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3116 iemNativeRegFreeTmp(pReNative, idxEflReg);
3117
3118 /* Make a copy of the core state now as we start the if-block. */
3119 iemNativeCondStartIfBlock(pReNative, off);
3120
3121 return off;
3122}
3123
3124
3125#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
3126 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
3127 do {
3128
3129#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
3130 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
3131 do {
3132
3133/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
3134 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
3135DECL_INLINE_THROW(uint32_t)
3136iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
3137 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
3138{
3139 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
3140 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3141
3142 /* We need an if-block label for the non-inverted variant. */
3143 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
3144 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
3145
3146 /* Get the eflags. */
3147 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3148 kIemNativeGstRegUse_ReadOnly);
3149
3150 /* Translate the flag masks to bit numbers. */
3151 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3152 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3153
3154 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
3155 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
3156 Assert(iBitNo1 != iBitNo);
3157
3158 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
3159 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
3160 Assert(iBitNo2 != iBitNo);
3161 Assert(iBitNo2 != iBitNo1);
3162
3163#ifdef RT_ARCH_AMD64
3164 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
3165#elif defined(RT_ARCH_ARM64)
3166 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3167#endif
3168
3169 /* Check for the lone bit first. */
3170 if (!fInverted)
3171 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3172 else
3173 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
3174
3175 /* Then extract and compare the other two bits. */
3176#ifdef RT_ARCH_AMD64
3177 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3178 if (iBitNo1 > iBitNo2)
3179 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3180 else
3181 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3182 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3183
3184#elif defined(RT_ARCH_ARM64)
3185 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3186
3187 /* and tmpreg, eflreg, #1<<iBitNo1 */
3188 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3189
3190 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3191 if (iBitNo1 > iBitNo2)
3192 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3193 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3194 else
3195 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3196 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3197
3198 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3199
3200#else
3201# error "Port me"
3202#endif
3203
3204 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3205 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3206 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3207
3208 /* Free but don't flush the EFlags and tmp registers. */
3209 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3210 iemNativeRegFreeTmp(pReNative, idxEflReg);
3211
3212 /* Make a copy of the core state now as we start the if-block. */
3213 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3214
3215 return off;
3216}
3217
3218
3219#define IEM_MC_IF_CX_IS_NZ() \
3220 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3221 do {
3222
3223/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3224DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3225{
3226 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3227
3228 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3229 kIemNativeGstRegUse_ReadOnly);
3230 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3231 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3232
3233 iemNativeCondStartIfBlock(pReNative, off);
3234 return off;
3235}
3236
3237
3238#define IEM_MC_IF_ECX_IS_NZ() \
3239 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3240 do {
3241
3242#define IEM_MC_IF_RCX_IS_NZ() \
3243 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3244 do {
3245
3246/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3247DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3248{
3249 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3250
3251 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3252 kIemNativeGstRegUse_ReadOnly);
3253 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3254 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3255
3256 iemNativeCondStartIfBlock(pReNative, off);
3257 return off;
3258}
3259
3260
3261#define IEM_MC_IF_CX_IS_NOT_ONE() \
3262 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3263 do {
3264
3265/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3266DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3267{
3268 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3269
3270 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3271 kIemNativeGstRegUse_ReadOnly);
3272#ifdef RT_ARCH_AMD64
3273 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3274#else
3275 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3276 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3277 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3278#endif
3279 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3280
3281 iemNativeCondStartIfBlock(pReNative, off);
3282 return off;
3283}
3284
3285
3286#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3287 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3288 do {
3289
3290#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3291 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3292 do {
3293
3294/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3295DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3296{
3297 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3298
3299 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3300 kIemNativeGstRegUse_ReadOnly);
3301 if (f64Bit)
3302 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3303 else
3304 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3305 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3306
3307 iemNativeCondStartIfBlock(pReNative, off);
3308 return off;
3309}
3310
3311
3312#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3313 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
3314 do {
3315
3316#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3317 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
3318 do {
3319
3320/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3321 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3322DECL_INLINE_THROW(uint32_t)
3323iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
3324{
3325 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3326 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3327
3328 /* We have to load both RCX and EFLAGS before we can start branching,
3329 otherwise we'll end up in the else-block with an inconsistent
3330 register allocator state.
3331 Doing EFLAGS first as it's more likely to be loaded, right? */
3332 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3333 kIemNativeGstRegUse_ReadOnly);
3334 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3335 kIemNativeGstRegUse_ReadOnly);
3336
3337 /** @todo we could reduce this to a single branch instruction by spending a
3338 * temporary register and some setnz stuff. Not sure if loops are
3339 * worth it. */
3340 /* Check CX. */
3341#ifdef RT_ARCH_AMD64
3342 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3343#else
3344 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3345 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3346 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3347#endif
3348
3349 /* Check the EFlags bit. */
3350 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3351 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3352 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3353 !fCheckIfSet /*fJmpIfSet*/);
3354
3355 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3356 iemNativeRegFreeTmp(pReNative, idxEflReg);
3357
3358 iemNativeCondStartIfBlock(pReNative, off);
3359 return off;
3360}
3361
3362
3363#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3364 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3365 do {
3366
3367#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3368 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3369 do {
3370
3371#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3372 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3373 do {
3374
3375#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3376 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3377 do {
3378
3379/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3380 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3381 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3382 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3383DECL_INLINE_THROW(uint32_t)
3384iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3385 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3386{
3387 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3388 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3389
3390 /* We have to load both RCX and EFLAGS before we can start branching,
3391 otherwise we'll end up in the else-block with an inconsistent
3392 register allocator state.
3393 Doing EFLAGS first as it's more likely to be loaded, right? */
3394 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3395 kIemNativeGstRegUse_ReadOnly);
3396 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3397 kIemNativeGstRegUse_ReadOnly);
3398
3399 /** @todo we could reduce this to a single branch instruction by spending a
3400 * temporary register and some setnz stuff. Not sure if loops are
3401 * worth it. */
3402 /* Check RCX/ECX. */
3403 if (f64Bit)
3404 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3405 else
3406 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3407
3408 /* Check the EFlags bit. */
3409 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3410 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3411 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3412 !fCheckIfSet /*fJmpIfSet*/);
3413
3414 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3415 iemNativeRegFreeTmp(pReNative, idxEflReg);
3416
3417 iemNativeCondStartIfBlock(pReNative, off);
3418 return off;
3419}
3420
3421
3422#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3423 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3424 do {
3425
3426/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3427DECL_INLINE_THROW(uint32_t)
3428iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3429{
3430 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3431
3432 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3433 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3434 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3435 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3436
3437 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3438
3439 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3440
3441 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3442
3443 iemNativeCondStartIfBlock(pReNative, off);
3444 return off;
3445}
3446
3447
3448#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3449 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3450 do {
3451
3452/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3453DECL_INLINE_THROW(uint32_t)
3454iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3455{
3456 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3457 Assert(iGReg < 16);
3458
3459 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3460 kIemNativeGstRegUse_ReadOnly);
3461
3462 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3463
3464 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3465
3466 iemNativeCondStartIfBlock(pReNative, off);
3467 return off;
3468}
3469
3470
3471
3472/*********************************************************************************************************************************
3473* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3474*********************************************************************************************************************************/
3475
3476#define IEM_MC_NOREF(a_Name) \
3477 RT_NOREF_PV(a_Name)
3478
3479#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3480 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3481
3482#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3483 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3484
3485#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3486 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3487
3488#define IEM_MC_LOCAL(a_Type, a_Name) \
3489 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3490
3491#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3492 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3493
3494#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3495 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3496
3497
3498/**
3499 * Sets the host register for @a idxVarRc to @a idxReg.
3500 *
3501 * The register must not be allocated. Any guest register shadowing will be
3502 * implictly dropped by this call.
3503 *
3504 * The variable must not have any register associated with it (causes
3505 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3506 * implied.
3507 *
3508 * @returns idxReg
3509 * @param pReNative The recompiler state.
3510 * @param idxVar The variable.
3511 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3512 * @param off For recording in debug info.
3513 *
3514 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3515 */
3516DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
3517{
3518 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3519 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3520 Assert(!pVar->fRegAcquired);
3521 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3522 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3523 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3524
3525 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3526 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3527
3528 iemNativeVarSetKindToStack(pReNative, idxVar);
3529 pVar->idxReg = idxReg;
3530
3531 return idxReg;
3532}
3533
3534
3535/**
3536 * A convenient helper function.
3537 */
3538DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3539 uint8_t idxReg, uint32_t *poff)
3540{
3541 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
3542 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3543 return idxReg;
3544}
3545
3546
3547/**
3548 * This is called by IEM_MC_END() to clean up all variables.
3549 */
3550DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3551{
3552 uint32_t const bmVars = pReNative->Core.bmVars;
3553 if (bmVars != 0)
3554 iemNativeVarFreeAllSlow(pReNative, bmVars);
3555 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3556 Assert(pReNative->Core.bmStack == 0);
3557}
3558
3559
3560#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3561
3562/**
3563 * This is called by IEM_MC_FREE_LOCAL.
3564 */
3565DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3566{
3567 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3568 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3569 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3570}
3571
3572
3573#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3574
3575/**
3576 * This is called by IEM_MC_FREE_ARG.
3577 */
3578DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3579{
3580 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3581 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3582 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3583}
3584
3585
3586#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
3587
3588/**
3589 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
3590 */
3591DECL_INLINE_THROW(uint32_t)
3592iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
3593{
3594 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
3595 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
3596 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3597 Assert( pVarDst->cbVar == sizeof(uint16_t)
3598 || pVarDst->cbVar == sizeof(uint32_t));
3599
3600 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
3601 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
3602 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
3603 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
3604 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3605
3606 Assert(pVarDst->cbVar < pVarSrc->cbVar);
3607
3608 /*
3609 * Special case for immediates.
3610 */
3611 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
3612 {
3613 switch (pVarDst->cbVar)
3614 {
3615 case sizeof(uint16_t):
3616 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
3617 break;
3618 case sizeof(uint32_t):
3619 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
3620 break;
3621 default: AssertFailed(); break;
3622 }
3623 }
3624 else
3625 {
3626 /*
3627 * The generic solution for now.
3628 */
3629 /** @todo optimize this by having the python script make sure the source
3630 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
3631 * statement. Then we could just transfer the register assignments. */
3632 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
3633 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
3634 switch (pVarDst->cbVar)
3635 {
3636 case sizeof(uint16_t):
3637 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
3638 break;
3639 case sizeof(uint32_t):
3640 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
3641 break;
3642 default: AssertFailed(); break;
3643 }
3644 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
3645 iemNativeVarRegisterRelease(pReNative, idxVarDst);
3646 }
3647 return off;
3648}
3649
3650
3651
3652/*********************************************************************************************************************************
3653* Emitters for IEM_MC_CALL_CIMPL_XXX *
3654*********************************************************************************************************************************/
3655
3656/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
3657DECL_INLINE_THROW(uint32_t)
3658iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
3659 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
3660
3661{
3662 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
3663
3664#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3665 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
3666 when a calls clobber any of the relevant control registers. */
3667# if 1
3668 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
3669 {
3670 /* Likely as long as call+ret are done via cimpl. */
3671 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
3672 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
3673 }
3674 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
3675 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3676 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
3677 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3678 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
3679 else
3680 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3681 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3682 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3683
3684# else
3685 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
3686 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3687 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
3688 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3689 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
3690 || pfnCImpl == (uintptr_t)iemCImpl_callf
3691 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
3692 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
3693 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3694 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3695 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3696# endif
3697
3698# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3699 /* Mark the host floating point control register as not synced if MXCSR is modified. */
3700 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
3701 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
3702# endif
3703#endif
3704
3705 /*
3706 * Do all the call setup and cleanup.
3707 */
3708 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
3709
3710 /*
3711 * Load the two or three hidden arguments.
3712 */
3713#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3714 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3715 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3716 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
3717#else
3718 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3719 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
3720#endif
3721
3722 /*
3723 * Make the call and check the return code.
3724 *
3725 * Shadow PC copies are always flushed here, other stuff depends on flags.
3726 * Segment and general purpose registers are explictily flushed via the
3727 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
3728 * macros.
3729 */
3730 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
3731#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3732 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3733#endif
3734 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
3735 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
3736 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3737 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3738
3739#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
3740 pReNative->Core.fDebugPcInitialized = false;
3741 Log4(("fDebugPcInitialized=false cimpl off=%#x (v1)\n", off));
3742#endif
3743
3744 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3745}
3746
3747
3748#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
3749 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
3750
3751/** Emits code for IEM_MC_CALL_CIMPL_1. */
3752DECL_INLINE_THROW(uint32_t)
3753iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3754 uintptr_t pfnCImpl, uint8_t idxArg0)
3755{
3756 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3757 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
3758}
3759
3760
3761#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
3762 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
3763
3764/** Emits code for IEM_MC_CALL_CIMPL_2. */
3765DECL_INLINE_THROW(uint32_t)
3766iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3767 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
3768{
3769 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3770 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3771 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
3772}
3773
3774
3775#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
3776 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3777 (uintptr_t)a_pfnCImpl, a0, a1, a2)
3778
3779/** Emits code for IEM_MC_CALL_CIMPL_3. */
3780DECL_INLINE_THROW(uint32_t)
3781iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3782 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3783{
3784 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3785 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3786 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3787 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
3788}
3789
3790
3791#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
3792 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3793 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
3794
3795/** Emits code for IEM_MC_CALL_CIMPL_4. */
3796DECL_INLINE_THROW(uint32_t)
3797iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3798 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3799{
3800 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3801 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3802 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3803 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3804 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
3805}
3806
3807
3808#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
3809 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3810 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
3811
3812/** Emits code for IEM_MC_CALL_CIMPL_4. */
3813DECL_INLINE_THROW(uint32_t)
3814iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3815 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
3816{
3817 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3818 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3819 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3820 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3821 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
3822 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
3823}
3824
3825
3826/** Recompiler debugging: Flush guest register shadow copies. */
3827#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
3828
3829
3830
3831/*********************************************************************************************************************************
3832* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
3833*********************************************************************************************************************************/
3834
3835/**
3836 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
3837 */
3838DECL_INLINE_THROW(uint32_t)
3839iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3840 uintptr_t pfnAImpl, uint8_t cArgs)
3841{
3842 if (idxVarRc != UINT8_MAX)
3843 {
3844 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
3845 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
3846 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3847 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3848 }
3849
3850 /*
3851 * Do all the call setup and cleanup.
3852 *
3853 * It is only required to flush pending guest register writes in call volatile registers as
3854 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
3855 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
3856 * no matter the fFlushPendingWrites parameter.
3857 */
3858 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
3859
3860 /*
3861 * Make the call and update the return code variable if we've got one.
3862 */
3863 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
3864 if (idxVarRc != UINT8_MAX)
3865 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
3866
3867 return off;
3868}
3869
3870
3871
3872#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
3873 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
3874
3875#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
3876 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
3877
3878/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
3879DECL_INLINE_THROW(uint32_t)
3880iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
3881{
3882 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
3883}
3884
3885
3886#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
3887 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
3888
3889#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
3890 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
3891
3892/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
3893DECL_INLINE_THROW(uint32_t)
3894iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
3895{
3896 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3897 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
3898}
3899
3900
3901#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
3902 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
3903
3904#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
3905 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
3906
3907/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
3908DECL_INLINE_THROW(uint32_t)
3909iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3910 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
3911{
3912 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3913 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3914 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
3915}
3916
3917
3918#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
3919 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
3920
3921#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
3922 IEM_MC_LOCAL(a_rcType, a_rc); \
3923 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
3924
3925/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
3926DECL_INLINE_THROW(uint32_t)
3927iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3928 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3929{
3930 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3931 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3932 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3933 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
3934}
3935
3936
3937#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
3938 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3939
3940#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
3941 IEM_MC_LOCAL(a_rcType, a_rc); \
3942 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3943
3944/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
3945DECL_INLINE_THROW(uint32_t)
3946iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3947 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3948{
3949 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3950 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3951 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3952 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
3953 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
3954}
3955
3956
3957
3958/*********************************************************************************************************************************
3959* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
3960*********************************************************************************************************************************/
3961
3962#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
3963 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
3964
3965#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3966 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
3967
3968#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3969 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
3970
3971#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3972 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
3973
3974
3975/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
3976 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
3977DECL_INLINE_THROW(uint32_t)
3978iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
3979{
3980 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3981 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3982 Assert(iGRegEx < 20);
3983
3984 /* Same discussion as in iemNativeEmitFetchGregU16 */
3985 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3986 kIemNativeGstRegUse_ReadOnly);
3987
3988 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3989 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3990
3991 /* The value is zero-extended to the full 64-bit host register width. */
3992 if (iGRegEx < 16)
3993 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3994 else
3995 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3996
3997 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3998 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3999 return off;
4000}
4001
4002
4003#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4004 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
4005
4006#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4007 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
4008
4009#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4010 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
4011
4012/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
4013DECL_INLINE_THROW(uint32_t)
4014iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
4015{
4016 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4017 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4018 Assert(iGRegEx < 20);
4019
4020 /* Same discussion as in iemNativeEmitFetchGregU16 */
4021 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4022 kIemNativeGstRegUse_ReadOnly);
4023
4024 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4025 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4026
4027 if (iGRegEx < 16)
4028 {
4029 switch (cbSignExtended)
4030 {
4031 case sizeof(uint16_t):
4032 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4033 break;
4034 case sizeof(uint32_t):
4035 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4036 break;
4037 case sizeof(uint64_t):
4038 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4039 break;
4040 default: AssertFailed(); break;
4041 }
4042 }
4043 else
4044 {
4045 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4046 switch (cbSignExtended)
4047 {
4048 case sizeof(uint16_t):
4049 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4050 break;
4051 case sizeof(uint32_t):
4052 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4053 break;
4054 case sizeof(uint64_t):
4055 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4056 break;
4057 default: AssertFailed(); break;
4058 }
4059 }
4060
4061 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4062 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4063 return off;
4064}
4065
4066
4067
4068#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
4069 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
4070
4071#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
4072 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4073
4074#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
4075 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4076
4077/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
4078DECL_INLINE_THROW(uint32_t)
4079iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4080{
4081 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4082 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4083 Assert(iGReg < 16);
4084
4085 /*
4086 * We can either just load the low 16-bit of the GPR into a host register
4087 * for the variable, or we can do so via a shadow copy host register. The
4088 * latter will avoid having to reload it if it's being stored later, but
4089 * will waste a host register if it isn't touched again. Since we don't
4090 * know what going to happen, we choose the latter for now.
4091 */
4092 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4093 kIemNativeGstRegUse_ReadOnly);
4094
4095 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4096 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4097 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4098 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4099
4100 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4101 return off;
4102}
4103
4104#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
4105 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
4106
4107/** Emits code for IEM_MC_FETCH_GREG_I16. */
4108DECL_INLINE_THROW(uint32_t)
4109iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4110{
4111 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4112 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
4113 Assert(iGReg < 16);
4114
4115 /*
4116 * We can either just load the low 16-bit of the GPR into a host register
4117 * for the variable, or we can do so via a shadow copy host register. The
4118 * latter will avoid having to reload it if it's being stored later, but
4119 * will waste a host register if it isn't touched again. Since we don't
4120 * know what going to happen, we choose the latter for now.
4121 */
4122 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4123 kIemNativeGstRegUse_ReadOnly);
4124
4125 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4126 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4127#ifdef RT_ARCH_AMD64
4128 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4129#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
4130 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4131#endif
4132 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4133
4134 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4135 return off;
4136}
4137
4138
4139#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
4140 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4141
4142#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
4143 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4144
4145/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
4146DECL_INLINE_THROW(uint32_t)
4147iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
4148{
4149 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4150 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4151 Assert(iGReg < 16);
4152
4153 /*
4154 * We can either just load the low 16-bit of the GPR into a host register
4155 * for the variable, or we can do so via a shadow copy host register. The
4156 * latter will avoid having to reload it if it's being stored later, but
4157 * will waste a host register if it isn't touched again. Since we don't
4158 * know what going to happen, we choose the latter for now.
4159 */
4160 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4161 kIemNativeGstRegUse_ReadOnly);
4162
4163 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4164 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4165 if (cbSignExtended == sizeof(uint32_t))
4166 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4167 else
4168 {
4169 Assert(cbSignExtended == sizeof(uint64_t));
4170 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4171 }
4172 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4173
4174 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4175 return off;
4176}
4177
4178
4179#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
4180 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
4181
4182#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
4183 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
4184
4185#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
4186 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
4187
4188/** Emits code for IEM_MC_FETCH_GREG_U32. */
4189DECL_INLINE_THROW(uint32_t)
4190iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4191{
4192 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4193 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4194 Assert(iGReg < 16);
4195
4196 /*
4197 * We can either just load the low 16-bit of the GPR into a host register
4198 * for the variable, or we can do so via a shadow copy host register. The
4199 * latter will avoid having to reload it if it's being stored later, but
4200 * will waste a host register if it isn't touched again. Since we don't
4201 * know what going to happen, we choose the latter for now.
4202 */
4203 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4204 kIemNativeGstRegUse_ReadOnly);
4205
4206 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4207 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4208 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4209 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4210
4211 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4212 return off;
4213}
4214
4215
4216#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4217 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4218
4219/** Emits code for IEM_MC_FETCH_GREG_U32. */
4220DECL_INLINE_THROW(uint32_t)
4221iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4222{
4223 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4224 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4225 Assert(iGReg < 16);
4226
4227 /*
4228 * We can either just load the low 32-bit of the GPR into a host register
4229 * for the variable, or we can do so via a shadow copy host register. The
4230 * latter will avoid having to reload it if it's being stored later, but
4231 * will waste a host register if it isn't touched again. Since we don't
4232 * know what going to happen, we choose the latter for now.
4233 */
4234 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4235 kIemNativeGstRegUse_ReadOnly);
4236
4237 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4238 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4239 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4240 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4241
4242 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4243 return off;
4244}
4245
4246
4247#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4248 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4249
4250#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4251 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4252
4253/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4254 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4255DECL_INLINE_THROW(uint32_t)
4256iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4257{
4258 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4259 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4260 Assert(iGReg < 16);
4261
4262 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4263 kIemNativeGstRegUse_ReadOnly);
4264
4265 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4266 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4267 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4268 /** @todo name the register a shadow one already? */
4269 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4270
4271 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4272 return off;
4273}
4274
4275
4276#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4277#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4278 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4279
4280/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4281DECL_INLINE_THROW(uint32_t)
4282iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4283{
4284 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4285 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4286 Assert(iGRegLo < 16 && iGRegHi < 16);
4287
4288 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4289 kIemNativeGstRegUse_ReadOnly);
4290 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4291 kIemNativeGstRegUse_ReadOnly);
4292
4293 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4294 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4295 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4296 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4297
4298 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4299 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4300 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4301 return off;
4302}
4303#endif
4304
4305
4306/*********************************************************************************************************************************
4307* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4308*********************************************************************************************************************************/
4309
4310#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4311 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4312
4313/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4314DECL_INLINE_THROW(uint32_t)
4315iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4316{
4317 Assert(iGRegEx < 20);
4318 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4319 kIemNativeGstRegUse_ForUpdate);
4320#ifdef RT_ARCH_AMD64
4321 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4322
4323 /* To the lowest byte of the register: mov r8, imm8 */
4324 if (iGRegEx < 16)
4325 {
4326 if (idxGstTmpReg >= 8)
4327 pbCodeBuf[off++] = X86_OP_REX_B;
4328 else if (idxGstTmpReg >= 4)
4329 pbCodeBuf[off++] = X86_OP_REX;
4330 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4331 pbCodeBuf[off++] = u8Value;
4332 }
4333 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4334 else if (idxGstTmpReg < 4)
4335 {
4336 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4337 pbCodeBuf[off++] = u8Value;
4338 }
4339 else
4340 {
4341 /* ror reg64, 8 */
4342 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4343 pbCodeBuf[off++] = 0xc1;
4344 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4345 pbCodeBuf[off++] = 8;
4346
4347 /* mov reg8, imm8 */
4348 if (idxGstTmpReg >= 8)
4349 pbCodeBuf[off++] = X86_OP_REX_B;
4350 else if (idxGstTmpReg >= 4)
4351 pbCodeBuf[off++] = X86_OP_REX;
4352 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4353 pbCodeBuf[off++] = u8Value;
4354
4355 /* rol reg64, 8 */
4356 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4357 pbCodeBuf[off++] = 0xc1;
4358 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4359 pbCodeBuf[off++] = 8;
4360 }
4361
4362#elif defined(RT_ARCH_ARM64)
4363 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4364 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4365 if (iGRegEx < 16)
4366 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4367 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4368 else
4369 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4370 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4371 iemNativeRegFreeTmp(pReNative, idxImmReg);
4372
4373#else
4374# error "Port me!"
4375#endif
4376
4377 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4378
4379#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4380 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4381#endif
4382
4383 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4384 return off;
4385}
4386
4387
4388#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4389 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4390
4391/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4392DECL_INLINE_THROW(uint32_t)
4393iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4394{
4395 Assert(iGRegEx < 20);
4396 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4397
4398 /*
4399 * If it's a constant value (unlikely) we treat this as a
4400 * IEM_MC_STORE_GREG_U8_CONST statement.
4401 */
4402 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4403 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4404 { /* likely */ }
4405 else
4406 {
4407 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4408 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4409 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4410 }
4411
4412 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4413 kIemNativeGstRegUse_ForUpdate);
4414 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4415
4416#ifdef RT_ARCH_AMD64
4417 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4418 if (iGRegEx < 16)
4419 {
4420 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4421 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4422 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4423 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4424 pbCodeBuf[off++] = X86_OP_REX;
4425 pbCodeBuf[off++] = 0x8a;
4426 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4427 }
4428 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4429 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4430 {
4431 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4432 pbCodeBuf[off++] = 0x8a;
4433 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4434 }
4435 else
4436 {
4437 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4438
4439 /* ror reg64, 8 */
4440 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4441 pbCodeBuf[off++] = 0xc1;
4442 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4443 pbCodeBuf[off++] = 8;
4444
4445 /* mov reg8, reg8(r/m) */
4446 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4447 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4448 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4449 pbCodeBuf[off++] = X86_OP_REX;
4450 pbCodeBuf[off++] = 0x8a;
4451 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4452
4453 /* rol reg64, 8 */
4454 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4455 pbCodeBuf[off++] = 0xc1;
4456 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4457 pbCodeBuf[off++] = 8;
4458 }
4459
4460#elif defined(RT_ARCH_ARM64)
4461 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4462 or
4463 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4464 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4465 if (iGRegEx < 16)
4466 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4467 else
4468 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4469
4470#else
4471# error "Port me!"
4472#endif
4473 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4474
4475 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4476
4477#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4478 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4479#endif
4480 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4481 return off;
4482}
4483
4484
4485
4486#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4487 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4488
4489/** Emits code for IEM_MC_STORE_GREG_U16. */
4490DECL_INLINE_THROW(uint32_t)
4491iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4492{
4493 Assert(iGReg < 16);
4494 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4495 kIemNativeGstRegUse_ForUpdate);
4496#ifdef RT_ARCH_AMD64
4497 /* mov reg16, imm16 */
4498 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4499 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4500 if (idxGstTmpReg >= 8)
4501 pbCodeBuf[off++] = X86_OP_REX_B;
4502 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4503 pbCodeBuf[off++] = RT_BYTE1(uValue);
4504 pbCodeBuf[off++] = RT_BYTE2(uValue);
4505
4506#elif defined(RT_ARCH_ARM64)
4507 /* movk xdst, #uValue, lsl #0 */
4508 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4509 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4510
4511#else
4512# error "Port me!"
4513#endif
4514
4515 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4516
4517#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4518 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4519#endif
4520 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4521 return off;
4522}
4523
4524
4525#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4526 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4527
4528/** Emits code for IEM_MC_STORE_GREG_U16. */
4529DECL_INLINE_THROW(uint32_t)
4530iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4531{
4532 Assert(iGReg < 16);
4533 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4534
4535 /*
4536 * If it's a constant value (unlikely) we treat this as a
4537 * IEM_MC_STORE_GREG_U16_CONST statement.
4538 */
4539 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4540 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4541 { /* likely */ }
4542 else
4543 {
4544 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4545 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4546 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4547 }
4548
4549 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4550 kIemNativeGstRegUse_ForUpdate);
4551
4552#ifdef RT_ARCH_AMD64
4553 /* mov reg16, reg16 or [mem16] */
4554 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4555 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4556 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4557 {
4558 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4559 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4560 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4561 pbCodeBuf[off++] = 0x8b;
4562 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4563 }
4564 else
4565 {
4566 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4567 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4568 if (idxGstTmpReg >= 8)
4569 pbCodeBuf[off++] = X86_OP_REX_R;
4570 pbCodeBuf[off++] = 0x8b;
4571 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4572 }
4573
4574#elif defined(RT_ARCH_ARM64)
4575 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4576 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4577 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4578 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4579 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4580
4581#else
4582# error "Port me!"
4583#endif
4584
4585 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4586
4587#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4588 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4589#endif
4590 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4591 return off;
4592}
4593
4594
4595#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
4596 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
4597
4598/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
4599DECL_INLINE_THROW(uint32_t)
4600iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
4601{
4602 Assert(iGReg < 16);
4603 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4604 kIemNativeGstRegUse_ForFullWrite);
4605 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4606#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4607 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4608#endif
4609 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4610 return off;
4611}
4612
4613
4614#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
4615 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
4616
4617#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
4618 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
4619
4620/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
4621DECL_INLINE_THROW(uint32_t)
4622iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4623{
4624 Assert(iGReg < 16);
4625 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4626
4627 /*
4628 * If it's a constant value (unlikely) we treat this as a
4629 * IEM_MC_STORE_GREG_U32_CONST statement.
4630 */
4631 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4632 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4633 { /* likely */ }
4634 else
4635 {
4636 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4637 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4638 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
4639 }
4640
4641 /*
4642 * For the rest we allocate a guest register for the variable and writes
4643 * it to the CPUMCTX structure.
4644 */
4645 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4646#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4647 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4648#else
4649 RT_NOREF(idxVarReg);
4650#endif
4651#ifdef VBOX_STRICT
4652 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
4653#endif
4654 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4655 return off;
4656}
4657
4658
4659#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
4660 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
4661
4662/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
4663DECL_INLINE_THROW(uint32_t)
4664iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
4665{
4666 Assert(iGReg < 16);
4667 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4668 kIemNativeGstRegUse_ForFullWrite);
4669 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4670#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4671 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4672#endif
4673 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4674 return off;
4675}
4676
4677
4678#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
4679 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
4680
4681#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
4682 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
4683
4684/** Emits code for IEM_MC_STORE_GREG_U64. */
4685DECL_INLINE_THROW(uint32_t)
4686iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4687{
4688 Assert(iGReg < 16);
4689 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4690
4691 /*
4692 * If it's a constant value (unlikely) we treat this as a
4693 * IEM_MC_STORE_GREG_U64_CONST statement.
4694 */
4695 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4696 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4697 { /* likely */ }
4698 else
4699 {
4700 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4701 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4702 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
4703 }
4704
4705 /*
4706 * For the rest we allocate a guest register for the variable and writes
4707 * it to the CPUMCTX structure.
4708 */
4709 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4710#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4711 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4712#else
4713 RT_NOREF(idxVarReg);
4714#endif
4715 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4716 return off;
4717}
4718
4719
4720#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
4721 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
4722
4723/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
4724DECL_INLINE_THROW(uint32_t)
4725iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
4726{
4727 Assert(iGReg < 16);
4728 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4729 kIemNativeGstRegUse_ForUpdate);
4730 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
4731#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4732 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4733#endif
4734 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4735 return off;
4736}
4737
4738
4739#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4740#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
4741 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
4742
4743/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4744DECL_INLINE_THROW(uint32_t)
4745iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
4746{
4747 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4748 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4749 Assert(iGRegLo < 16 && iGRegHi < 16);
4750
4751 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4752 kIemNativeGstRegUse_ForFullWrite);
4753 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4754 kIemNativeGstRegUse_ForFullWrite);
4755
4756 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4757 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
4758 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
4759 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
4760
4761 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4762 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4763 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4764 return off;
4765}
4766#endif
4767
4768
4769/*********************************************************************************************************************************
4770* General purpose register manipulation (add, sub). *
4771*********************************************************************************************************************************/
4772
4773#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4774 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4775
4776/** Emits code for IEM_MC_ADD_GREG_U16. */
4777DECL_INLINE_THROW(uint32_t)
4778iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
4779{
4780 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4781 kIemNativeGstRegUse_ForUpdate);
4782
4783#ifdef RT_ARCH_AMD64
4784 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4785 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4786 if (idxGstTmpReg >= 8)
4787 pbCodeBuf[off++] = X86_OP_REX_B;
4788 if (uAddend == 1)
4789 {
4790 pbCodeBuf[off++] = 0xff; /* inc */
4791 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4792 }
4793 else
4794 {
4795 pbCodeBuf[off++] = 0x81;
4796 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4797 pbCodeBuf[off++] = uAddend;
4798 pbCodeBuf[off++] = 0;
4799 }
4800
4801#else
4802 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4803 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4804
4805 /* sub tmp, gstgrp, uAddend */
4806 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
4807
4808 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4809 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4810
4811 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4812#endif
4813
4814 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4815
4816#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4817 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4818#endif
4819
4820 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4821 return off;
4822}
4823
4824
4825#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
4826 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4827
4828#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
4829 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4830
4831/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
4832DECL_INLINE_THROW(uint32_t)
4833iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
4834{
4835 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4836 kIemNativeGstRegUse_ForUpdate);
4837
4838#ifdef RT_ARCH_AMD64
4839 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4840 if (f64Bit)
4841 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4842 else if (idxGstTmpReg >= 8)
4843 pbCodeBuf[off++] = X86_OP_REX_B;
4844 if (uAddend == 1)
4845 {
4846 pbCodeBuf[off++] = 0xff; /* inc */
4847 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4848 }
4849 else if (uAddend < 128)
4850 {
4851 pbCodeBuf[off++] = 0x83; /* add */
4852 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4853 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4854 }
4855 else
4856 {
4857 pbCodeBuf[off++] = 0x81; /* add */
4858 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4859 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4860 pbCodeBuf[off++] = 0;
4861 pbCodeBuf[off++] = 0;
4862 pbCodeBuf[off++] = 0;
4863 }
4864
4865#else
4866 /* sub tmp, gstgrp, uAddend */
4867 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4868 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
4869
4870#endif
4871
4872 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4873
4874#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4875 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4876#endif
4877
4878 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4879 return off;
4880}
4881
4882
4883
4884#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4885 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4886
4887/** Emits code for IEM_MC_SUB_GREG_U16. */
4888DECL_INLINE_THROW(uint32_t)
4889iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
4890{
4891 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4892 kIemNativeGstRegUse_ForUpdate);
4893
4894#ifdef RT_ARCH_AMD64
4895 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4896 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4897 if (idxGstTmpReg >= 8)
4898 pbCodeBuf[off++] = X86_OP_REX_B;
4899 if (uSubtrahend == 1)
4900 {
4901 pbCodeBuf[off++] = 0xff; /* dec */
4902 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4903 }
4904 else
4905 {
4906 pbCodeBuf[off++] = 0x81;
4907 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4908 pbCodeBuf[off++] = uSubtrahend;
4909 pbCodeBuf[off++] = 0;
4910 }
4911
4912#else
4913 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4914 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4915
4916 /* sub tmp, gstgrp, uSubtrahend */
4917 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
4918
4919 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4920 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4921
4922 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4923#endif
4924
4925 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4926
4927#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4928 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4929#endif
4930
4931 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4932 return off;
4933}
4934
4935
4936#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
4937 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4938
4939#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
4940 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4941
4942/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
4943DECL_INLINE_THROW(uint32_t)
4944iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
4945{
4946 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4947 kIemNativeGstRegUse_ForUpdate);
4948
4949#ifdef RT_ARCH_AMD64
4950 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4951 if (f64Bit)
4952 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4953 else if (idxGstTmpReg >= 8)
4954 pbCodeBuf[off++] = X86_OP_REX_B;
4955 if (uSubtrahend == 1)
4956 {
4957 pbCodeBuf[off++] = 0xff; /* dec */
4958 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4959 }
4960 else if (uSubtrahend < 128)
4961 {
4962 pbCodeBuf[off++] = 0x83; /* sub */
4963 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4964 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4965 }
4966 else
4967 {
4968 pbCodeBuf[off++] = 0x81; /* sub */
4969 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4970 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4971 pbCodeBuf[off++] = 0;
4972 pbCodeBuf[off++] = 0;
4973 pbCodeBuf[off++] = 0;
4974 }
4975
4976#else
4977 /* sub tmp, gstgrp, uSubtrahend */
4978 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4979 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
4980
4981#endif
4982
4983 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4984
4985#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4986 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4987#endif
4988
4989 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4990 return off;
4991}
4992
4993
4994#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
4995 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4996
4997#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
4998 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4999
5000#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
5001 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5002
5003#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
5004 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5005
5006/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
5007DECL_INLINE_THROW(uint32_t)
5008iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5009{
5010#ifdef VBOX_STRICT
5011 switch (cbMask)
5012 {
5013 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5014 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5015 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5016 case sizeof(uint64_t): break;
5017 default: AssertFailedBreak();
5018 }
5019#endif
5020
5021 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5022 kIemNativeGstRegUse_ForUpdate);
5023
5024 switch (cbMask)
5025 {
5026 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5027 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
5028 break;
5029 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
5030 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
5031 break;
5032 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5033 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5034 break;
5035 case sizeof(uint64_t):
5036 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
5037 break;
5038 default: AssertFailedBreak();
5039 }
5040
5041 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5042
5043#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5044 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5045#endif
5046
5047 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5048 return off;
5049}
5050
5051
5052#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
5053 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5054
5055#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
5056 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5057
5058#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
5059 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5060
5061#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
5062 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5063
5064/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
5065DECL_INLINE_THROW(uint32_t)
5066iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5067{
5068#ifdef VBOX_STRICT
5069 switch (cbMask)
5070 {
5071 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5072 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5073 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5074 case sizeof(uint64_t): break;
5075 default: AssertFailedBreak();
5076 }
5077#endif
5078
5079 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5080 kIemNativeGstRegUse_ForUpdate);
5081
5082 switch (cbMask)
5083 {
5084 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5085 case sizeof(uint16_t):
5086 case sizeof(uint64_t):
5087 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
5088 break;
5089 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5090 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5091 break;
5092 default: AssertFailedBreak();
5093 }
5094
5095 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5096
5097#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5098 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5099#endif
5100
5101 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5102 return off;
5103}
5104
5105
5106/*********************************************************************************************************************************
5107* Local/Argument variable manipulation (add, sub, and, or). *
5108*********************************************************************************************************************************/
5109
5110#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
5111 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5112
5113#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
5114 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5115
5116#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
5117 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5118
5119#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
5120 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5121
5122
5123#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
5124 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
5125
5126#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
5127 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
5128
5129#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
5130 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
5131
5132/** Emits code for AND'ing a local and a constant value. */
5133DECL_INLINE_THROW(uint32_t)
5134iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5135{
5136#ifdef VBOX_STRICT
5137 switch (cbMask)
5138 {
5139 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5140 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5141 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5142 case sizeof(uint64_t): break;
5143 default: AssertFailedBreak();
5144 }
5145#endif
5146
5147 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5148 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5149
5150 if (cbMask <= sizeof(uint32_t))
5151 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
5152 else
5153 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
5154
5155 iemNativeVarRegisterRelease(pReNative, idxVar);
5156 return off;
5157}
5158
5159
5160#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
5161 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5162
5163#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
5164 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5165
5166#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
5167 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5168
5169#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
5170 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5171
5172/** Emits code for OR'ing a local and a constant value. */
5173DECL_INLINE_THROW(uint32_t)
5174iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5175{
5176#ifdef VBOX_STRICT
5177 switch (cbMask)
5178 {
5179 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5180 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5181 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5182 case sizeof(uint64_t): break;
5183 default: AssertFailedBreak();
5184 }
5185#endif
5186
5187 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5188 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5189
5190 if (cbMask <= sizeof(uint32_t))
5191 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
5192 else
5193 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
5194
5195 iemNativeVarRegisterRelease(pReNative, idxVar);
5196 return off;
5197}
5198
5199
5200#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
5201 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
5202
5203#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
5204 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
5205
5206#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
5207 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
5208
5209/** Emits code for reversing the byte order in a local value. */
5210DECL_INLINE_THROW(uint32_t)
5211iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
5212{
5213 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5214 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5215
5216 switch (cbLocal)
5217 {
5218 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5219 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5220 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5221 default: AssertFailedBreak();
5222 }
5223
5224 iemNativeVarRegisterRelease(pReNative, idxVar);
5225 return off;
5226}
5227
5228
5229#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5230 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5231
5232#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5233 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5234
5235#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5236 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5237
5238/** Emits code for shifting left a local value. */
5239DECL_INLINE_THROW(uint32_t)
5240iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5241{
5242#ifdef VBOX_STRICT
5243 switch (cbLocal)
5244 {
5245 case sizeof(uint8_t): Assert(cShift < 8); break;
5246 case sizeof(uint16_t): Assert(cShift < 16); break;
5247 case sizeof(uint32_t): Assert(cShift < 32); break;
5248 case sizeof(uint64_t): Assert(cShift < 64); break;
5249 default: AssertFailedBreak();
5250 }
5251#endif
5252
5253 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5254 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5255
5256 if (cbLocal <= sizeof(uint32_t))
5257 {
5258 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5259 if (cbLocal < sizeof(uint32_t))
5260 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5261 cbLocal == sizeof(uint16_t)
5262 ? UINT32_C(0xffff)
5263 : UINT32_C(0xff));
5264 }
5265 else
5266 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5267
5268 iemNativeVarRegisterRelease(pReNative, idxVar);
5269 return off;
5270}
5271
5272
5273#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5274 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5275
5276#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5277 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5278
5279#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5280 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5281
5282/** Emits code for shifting left a local value. */
5283DECL_INLINE_THROW(uint32_t)
5284iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5285{
5286#ifdef VBOX_STRICT
5287 switch (cbLocal)
5288 {
5289 case sizeof(int8_t): Assert(cShift < 8); break;
5290 case sizeof(int16_t): Assert(cShift < 16); break;
5291 case sizeof(int32_t): Assert(cShift < 32); break;
5292 case sizeof(int64_t): Assert(cShift < 64); break;
5293 default: AssertFailedBreak();
5294 }
5295#endif
5296
5297 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5298 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5299
5300 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5301 if (cbLocal == sizeof(uint8_t))
5302 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5303 else if (cbLocal == sizeof(uint16_t))
5304 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5305
5306 if (cbLocal <= sizeof(uint32_t))
5307 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5308 else
5309 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5310
5311 iemNativeVarRegisterRelease(pReNative, idxVar);
5312 return off;
5313}
5314
5315
5316#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5317 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5318
5319#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5320 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5321
5322#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5323 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5324
5325/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5326DECL_INLINE_THROW(uint32_t)
5327iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5328{
5329 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5330 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5331 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5332 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5333
5334 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5335 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
5336
5337 /* Need to sign extend the value. */
5338 if (cbLocal <= sizeof(uint32_t))
5339 {
5340/** @todo ARM64: In case of boredone, the extended add instruction can do the
5341 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5342 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5343
5344 switch (cbLocal)
5345 {
5346 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5347 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5348 default: AssertFailed();
5349 }
5350
5351 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5352 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5353 }
5354 else
5355 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5356
5357 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5358 iemNativeVarRegisterRelease(pReNative, idxVar);
5359 return off;
5360}
5361
5362
5363
5364/*********************************************************************************************************************************
5365* EFLAGS *
5366*********************************************************************************************************************************/
5367
5368#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5369# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5370#else
5371# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5372 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5373
5374DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5375{
5376 if (fEflOutput)
5377 {
5378 PVMCPUCC const pVCpu = pReNative->pVCpu;
5379# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5380 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5381 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5382 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5383# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5384 if (fEflOutput & (a_fEfl)) \
5385 { \
5386 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5387 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5388 else \
5389 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5390 } else do { } while (0)
5391# else
5392 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5393 IEMLIVENESSBIT const LivenessClobbered =
5394 {
5395 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5396 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5397 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5398 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5399 };
5400 IEMLIVENESSBIT const LivenessDelayable =
5401 {
5402 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5403 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5404 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5405 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5406 };
5407# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5408 if (fEflOutput & (a_fEfl)) \
5409 { \
5410 if (LivenessClobbered.a_fLivenessMember) \
5411 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5412 else if (LivenessDelayable.a_fLivenessMember) \
5413 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5414 else \
5415 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5416 } else do { } while (0)
5417# endif
5418 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5419 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5420 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5421 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5422 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5423 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5424 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5425# undef CHECK_FLAG_AND_UPDATE_STATS
5426 }
5427 RT_NOREF(fEflInput);
5428}
5429#endif /* VBOX_WITH_STATISTICS */
5430
5431#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5432#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5433 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5434
5435/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5436DECL_INLINE_THROW(uint32_t)
5437iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5438 uint32_t fEflInput, uint32_t fEflOutput)
5439{
5440 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5441 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5442 RT_NOREF(fEflInput, fEflOutput);
5443
5444#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5445# ifdef VBOX_STRICT
5446 if ( pReNative->idxCurCall != 0
5447 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5448 {
5449 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5450 uint32_t const fBoth = fEflInput | fEflOutput;
5451# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5452 AssertMsg( !(fBoth & (a_fElfConst)) \
5453 || (!(fEflInput & (a_fElfConst)) \
5454 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5455 : !(fEflOutput & (a_fElfConst)) \
5456 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5457 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5458 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5459 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5460 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5461 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5462 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5463 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5464 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5465 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5466# undef ASSERT_ONE_EFL
5467 }
5468# endif
5469#endif
5470
5471 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5472
5473 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
5474 * the existing shadow copy. */
5475 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5476 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5477 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5478 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5479 return off;
5480}
5481
5482
5483
5484/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5485 * start using it with custom native code emission (inlining assembly
5486 * instruction helpers). */
5487#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5488#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5489 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5490 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5491
5492#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5493#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5494 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5495 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5496
5497/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5498DECL_INLINE_THROW(uint32_t)
5499iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5500 bool fUpdateSkipping)
5501{
5502 RT_NOREF(fEflOutput);
5503 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5504 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5505
5506#ifdef VBOX_STRICT
5507 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5508 uint32_t offFixup = off;
5509 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5510 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5511 iemNativeFixupFixedJump(pReNative, offFixup, off);
5512
5513 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5514 offFixup = off;
5515 off = iemNativeEmitJzToFixed(pReNative, off, off);
5516 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5517 iemNativeFixupFixedJump(pReNative, offFixup, off);
5518
5519 /** @todo validate that only bits in the fElfOutput mask changed. */
5520#endif
5521
5522#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5523 if (fUpdateSkipping)
5524 {
5525 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5526 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5527 else
5528 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5529 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5530 }
5531#else
5532 RT_NOREF_PV(fUpdateSkipping);
5533#endif
5534
5535 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5536 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5537 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5538 return off;
5539}
5540
5541
5542typedef enum IEMNATIVEMITEFLOP
5543{
5544 kIemNativeEmitEflOp_Invalid = 0,
5545 kIemNativeEmitEflOp_Set,
5546 kIemNativeEmitEflOp_Clear,
5547 kIemNativeEmitEflOp_Flip
5548} IEMNATIVEMITEFLOP;
5549
5550#define IEM_MC_SET_EFL_BIT(a_fBit) \
5551 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
5552
5553#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5554 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
5555
5556#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5557 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
5558
5559/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5560DECL_INLINE_THROW(uint32_t)
5561iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
5562{
5563 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5564 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
5565
5566 switch (enmOp)
5567 {
5568 case kIemNativeEmitEflOp_Set:
5569 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5570 break;
5571 case kIemNativeEmitEflOp_Clear:
5572 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
5573 break;
5574 case kIemNativeEmitEflOp_Flip:
5575 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5576 break;
5577 default:
5578 AssertFailed();
5579 break;
5580 }
5581
5582 /** @todo No delayed writeback for EFLAGS right now. */
5583 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5584
5585 /* Free but don't flush the EFLAGS register. */
5586 iemNativeRegFreeTmp(pReNative, idxEflReg);
5587
5588 return off;
5589}
5590
5591
5592/*********************************************************************************************************************************
5593* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
5594*********************************************************************************************************************************/
5595
5596#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
5597 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
5598
5599#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
5600 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
5601
5602#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
5603 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
5604
5605
5606/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
5607 * IEM_MC_FETCH_SREG_ZX_U64. */
5608DECL_INLINE_THROW(uint32_t)
5609iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
5610{
5611 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5612 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
5613 Assert(iSReg < X86_SREG_COUNT);
5614
5615 /*
5616 * For now, we will not create a shadow copy of a selector. The rational
5617 * is that since we do not recompile the popping and loading of segment
5618 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
5619 * pushing and moving to registers, there is only a small chance that the
5620 * shadow copy will be accessed again before the register is reloaded. One
5621 * scenario would be nested called in 16-bit code, but I doubt it's worth
5622 * the extra register pressure atm.
5623 *
5624 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
5625 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
5626 * store scencario covered at present (r160730).
5627 */
5628 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5629 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
5630 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
5631 iemNativeVarRegisterRelease(pReNative, idxDstVar);
5632 return off;
5633}
5634
5635
5636
5637/*********************************************************************************************************************************
5638* Register references. *
5639*********************************************************************************************************************************/
5640
5641#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
5642 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
5643
5644#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
5645 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
5646
5647/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
5648DECL_INLINE_THROW(uint32_t)
5649iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
5650{
5651 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
5652 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5653 Assert(iGRegEx < 20);
5654
5655 if (iGRegEx < 16)
5656 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5657 else
5658 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
5659
5660 /* If we've delayed writing back the register value, flush it now. */
5661 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5662
5663 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5664 if (!fConst)
5665 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
5666
5667 return off;
5668}
5669
5670#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
5671 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
5672
5673#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
5674 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
5675
5676#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
5677 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
5678
5679#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
5680 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
5681
5682#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
5683 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
5684
5685#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
5686 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
5687
5688#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
5689 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
5690
5691#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
5692 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
5693
5694#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
5695 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
5696
5697#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
5698 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
5699
5700/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
5701DECL_INLINE_THROW(uint32_t)
5702iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
5703{
5704 Assert(iGReg < 16);
5705 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
5706 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5707
5708 /* If we've delayed writing back the register value, flush it now. */
5709 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
5710
5711 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5712 if (!fConst)
5713 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
5714
5715 return off;
5716}
5717
5718
5719#undef IEM_MC_REF_EFLAGS /* should not be used. */
5720#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
5721 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5722 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
5723
5724/** Handles IEM_MC_REF_EFLAGS. */
5725DECL_INLINE_THROW(uint32_t)
5726iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
5727{
5728 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
5729 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5730
5731#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5732 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5733
5734 /* Updating the skipping according to the outputs is a little early, but
5735 we don't have any other hooks for references atm. */
5736 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5737 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5738 else if (fEflOutput & X86_EFL_STATUS_BITS)
5739 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5740 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5741#else
5742 RT_NOREF(fEflInput, fEflOutput);
5743#endif
5744
5745 /* If we've delayed writing back the register value, flush it now. */
5746 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
5747
5748 /* If there is a shadow copy of guest EFLAGS, flush it now. */
5749 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
5750
5751 return off;
5752}
5753
5754
5755/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
5756 * different code from threaded recompiler, maybe it would be helpful. For now
5757 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
5758#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
5759
5760
5761#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
5762 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
5763
5764#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
5765 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
5766
5767#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
5768 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
5769
5770#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
5771 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
5772
5773#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5774/* Just being paranoid here. */
5775# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
5776AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
5777AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
5778AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
5779AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
5780# endif
5781AssertCompileMemberOffset(X86XMMREG, au64, 0);
5782AssertCompileMemberOffset(X86XMMREG, au32, 0);
5783AssertCompileMemberOffset(X86XMMREG, ar64, 0);
5784AssertCompileMemberOffset(X86XMMREG, ar32, 0);
5785
5786# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
5787 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
5788# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
5789 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
5790# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
5791 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
5792# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
5793 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
5794#endif
5795
5796/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
5797DECL_INLINE_THROW(uint32_t)
5798iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
5799{
5800 Assert(iXReg < 16);
5801 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
5802 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5803
5804 /* If we've delayed writing back the register value, flush it now. */
5805 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
5806
5807#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5808 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5809 if (!fConst)
5810 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
5811#else
5812 RT_NOREF(fConst);
5813#endif
5814
5815 return off;
5816}
5817
5818
5819
5820/*********************************************************************************************************************************
5821* Effective Address Calculation *
5822*********************************************************************************************************************************/
5823#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
5824 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
5825
5826/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
5827 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
5828DECL_INLINE_THROW(uint32_t)
5829iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5830 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
5831{
5832 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5833
5834 /*
5835 * Handle the disp16 form with no registers first.
5836 *
5837 * Convert to an immediate value, as that'll delay the register allocation
5838 * and assignment till the memory access / call / whatever and we can use
5839 * a more appropriate register (or none at all).
5840 */
5841 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
5842 {
5843 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
5844 return off;
5845 }
5846
5847 /* Determin the displacment. */
5848 uint16_t u16EffAddr;
5849 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5850 {
5851 case 0: u16EffAddr = 0; break;
5852 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
5853 case 2: u16EffAddr = u16Disp; break;
5854 default: AssertFailedStmt(u16EffAddr = 0);
5855 }
5856
5857 /* Determine the registers involved. */
5858 uint8_t idxGstRegBase;
5859 uint8_t idxGstRegIndex;
5860 switch (bRm & X86_MODRM_RM_MASK)
5861 {
5862 case 0:
5863 idxGstRegBase = X86_GREG_xBX;
5864 idxGstRegIndex = X86_GREG_xSI;
5865 break;
5866 case 1:
5867 idxGstRegBase = X86_GREG_xBX;
5868 idxGstRegIndex = X86_GREG_xDI;
5869 break;
5870 case 2:
5871 idxGstRegBase = X86_GREG_xBP;
5872 idxGstRegIndex = X86_GREG_xSI;
5873 break;
5874 case 3:
5875 idxGstRegBase = X86_GREG_xBP;
5876 idxGstRegIndex = X86_GREG_xDI;
5877 break;
5878 case 4:
5879 idxGstRegBase = X86_GREG_xSI;
5880 idxGstRegIndex = UINT8_MAX;
5881 break;
5882 case 5:
5883 idxGstRegBase = X86_GREG_xDI;
5884 idxGstRegIndex = UINT8_MAX;
5885 break;
5886 case 6:
5887 idxGstRegBase = X86_GREG_xBP;
5888 idxGstRegIndex = UINT8_MAX;
5889 break;
5890#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
5891 default:
5892#endif
5893 case 7:
5894 idxGstRegBase = X86_GREG_xBX;
5895 idxGstRegIndex = UINT8_MAX;
5896 break;
5897 }
5898
5899 /*
5900 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
5901 */
5902 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5903 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5904 kIemNativeGstRegUse_ReadOnly);
5905 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
5906 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5907 kIemNativeGstRegUse_ReadOnly)
5908 : UINT8_MAX;
5909#ifdef RT_ARCH_AMD64
5910 if (idxRegIndex == UINT8_MAX)
5911 {
5912 if (u16EffAddr == 0)
5913 {
5914 /* movxz ret, base */
5915 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
5916 }
5917 else
5918 {
5919 /* lea ret32, [base64 + disp32] */
5920 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5921 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5922 if (idxRegRet >= 8 || idxRegBase >= 8)
5923 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5924 pbCodeBuf[off++] = 0x8d;
5925 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5926 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
5927 else
5928 {
5929 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
5930 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5931 }
5932 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5933 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5934 pbCodeBuf[off++] = 0;
5935 pbCodeBuf[off++] = 0;
5936 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5937
5938 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5939 }
5940 }
5941 else
5942 {
5943 /* lea ret32, [index64 + base64 (+ disp32)] */
5944 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5945 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5946 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5947 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5948 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5949 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5950 pbCodeBuf[off++] = 0x8d;
5951 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
5952 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5953 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
5954 if (bMod == X86_MOD_MEM4)
5955 {
5956 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5957 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5958 pbCodeBuf[off++] = 0;
5959 pbCodeBuf[off++] = 0;
5960 }
5961 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5962 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5963 }
5964
5965#elif defined(RT_ARCH_ARM64)
5966 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5967 if (u16EffAddr == 0)
5968 {
5969 if (idxRegIndex == UINT8_MAX)
5970 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
5971 else
5972 {
5973 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
5974 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5975 }
5976 }
5977 else
5978 {
5979 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
5980 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
5981 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
5982 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5983 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
5984 else
5985 {
5986 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
5987 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5988 }
5989 if (idxRegIndex != UINT8_MAX)
5990 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
5991 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5992 }
5993
5994#else
5995# error "port me"
5996#endif
5997
5998 if (idxRegIndex != UINT8_MAX)
5999 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6000 iemNativeRegFreeTmp(pReNative, idxRegBase);
6001 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6002 return off;
6003}
6004
6005
6006#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6007 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6008
6009/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
6010 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
6011DECL_INLINE_THROW(uint32_t)
6012iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6013 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
6014{
6015 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6016
6017 /*
6018 * Handle the disp32 form with no registers first.
6019 *
6020 * Convert to an immediate value, as that'll delay the register allocation
6021 * and assignment till the memory access / call / whatever and we can use
6022 * a more appropriate register (or none at all).
6023 */
6024 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6025 {
6026 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
6027 return off;
6028 }
6029
6030 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6031 uint32_t u32EffAddr = 0;
6032 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6033 {
6034 case 0: break;
6035 case 1: u32EffAddr = (int8_t)u32Disp; break;
6036 case 2: u32EffAddr = u32Disp; break;
6037 default: AssertFailed();
6038 }
6039
6040 /* Get the register (or SIB) value. */
6041 uint8_t idxGstRegBase = UINT8_MAX;
6042 uint8_t idxGstRegIndex = UINT8_MAX;
6043 uint8_t cShiftIndex = 0;
6044 switch (bRm & X86_MODRM_RM_MASK)
6045 {
6046 case 0: idxGstRegBase = X86_GREG_xAX; break;
6047 case 1: idxGstRegBase = X86_GREG_xCX; break;
6048 case 2: idxGstRegBase = X86_GREG_xDX; break;
6049 case 3: idxGstRegBase = X86_GREG_xBX; break;
6050 case 4: /* SIB */
6051 {
6052 /* index /w scaling . */
6053 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6054 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6055 {
6056 case 0: idxGstRegIndex = X86_GREG_xAX; break;
6057 case 1: idxGstRegIndex = X86_GREG_xCX; break;
6058 case 2: idxGstRegIndex = X86_GREG_xDX; break;
6059 case 3: idxGstRegIndex = X86_GREG_xBX; break;
6060 case 4: cShiftIndex = 0; /*no index*/ break;
6061 case 5: idxGstRegIndex = X86_GREG_xBP; break;
6062 case 6: idxGstRegIndex = X86_GREG_xSI; break;
6063 case 7: idxGstRegIndex = X86_GREG_xDI; break;
6064 }
6065
6066 /* base */
6067 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
6068 {
6069 case 0: idxGstRegBase = X86_GREG_xAX; break;
6070 case 1: idxGstRegBase = X86_GREG_xCX; break;
6071 case 2: idxGstRegBase = X86_GREG_xDX; break;
6072 case 3: idxGstRegBase = X86_GREG_xBX; break;
6073 case 4:
6074 idxGstRegBase = X86_GREG_xSP;
6075 u32EffAddr += uSibAndRspOffset >> 8;
6076 break;
6077 case 5:
6078 if ((bRm & X86_MODRM_MOD_MASK) != 0)
6079 idxGstRegBase = X86_GREG_xBP;
6080 else
6081 {
6082 Assert(u32EffAddr == 0);
6083 u32EffAddr = u32Disp;
6084 }
6085 break;
6086 case 6: idxGstRegBase = X86_GREG_xSI; break;
6087 case 7: idxGstRegBase = X86_GREG_xDI; break;
6088 }
6089 break;
6090 }
6091 case 5: idxGstRegBase = X86_GREG_xBP; break;
6092 case 6: idxGstRegBase = X86_GREG_xSI; break;
6093 case 7: idxGstRegBase = X86_GREG_xDI; break;
6094 }
6095
6096 /*
6097 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6098 * the start of the function.
6099 */
6100 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6101 {
6102 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
6103 return off;
6104 }
6105
6106 /*
6107 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6108 */
6109 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6110 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6111 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6112 kIemNativeGstRegUse_ReadOnly);
6113 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6114 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6115 kIemNativeGstRegUse_ReadOnly);
6116
6117 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6118 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6119 {
6120 idxRegBase = idxRegIndex;
6121 idxRegIndex = UINT8_MAX;
6122 }
6123
6124#ifdef RT_ARCH_AMD64
6125 if (idxRegIndex == UINT8_MAX)
6126 {
6127 if (u32EffAddr == 0)
6128 {
6129 /* mov ret, base */
6130 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6131 }
6132 else
6133 {
6134 /* lea ret32, [base64 + disp32] */
6135 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6136 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6137 if (idxRegRet >= 8 || idxRegBase >= 8)
6138 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6139 pbCodeBuf[off++] = 0x8d;
6140 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6141 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6142 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6143 else
6144 {
6145 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6146 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6147 }
6148 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6149 if (bMod == X86_MOD_MEM4)
6150 {
6151 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6152 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6153 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6154 }
6155 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6156 }
6157 }
6158 else
6159 {
6160 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6161 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6162 if (idxRegBase == UINT8_MAX)
6163 {
6164 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
6165 if (idxRegRet >= 8 || idxRegIndex >= 8)
6166 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6167 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6168 pbCodeBuf[off++] = 0x8d;
6169 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6170 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6171 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6172 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6173 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6174 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6175 }
6176 else
6177 {
6178 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6179 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6180 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6181 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6182 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6183 pbCodeBuf[off++] = 0x8d;
6184 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6185 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6186 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6187 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6188 if (bMod != X86_MOD_MEM0)
6189 {
6190 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6191 if (bMod == X86_MOD_MEM4)
6192 {
6193 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6194 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6195 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6196 }
6197 }
6198 }
6199 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6200 }
6201
6202#elif defined(RT_ARCH_ARM64)
6203 if (u32EffAddr == 0)
6204 {
6205 if (idxRegIndex == UINT8_MAX)
6206 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6207 else if (idxRegBase == UINT8_MAX)
6208 {
6209 if (cShiftIndex == 0)
6210 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
6211 else
6212 {
6213 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6214 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6215 }
6216 }
6217 else
6218 {
6219 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6220 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6221 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6222 }
6223 }
6224 else
6225 {
6226 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6227 {
6228 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6229 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6230 }
6231 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6232 {
6233 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6234 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6235 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6236 }
6237 else
6238 {
6239 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6240 if (idxRegBase != UINT8_MAX)
6241 {
6242 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6243 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6244 }
6245 }
6246 if (idxRegIndex != UINT8_MAX)
6247 {
6248 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6249 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6250 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6251 }
6252 }
6253
6254#else
6255# error "port me"
6256#endif
6257
6258 if (idxRegIndex != UINT8_MAX)
6259 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6260 if (idxRegBase != UINT8_MAX)
6261 iemNativeRegFreeTmp(pReNative, idxRegBase);
6262 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6263 return off;
6264}
6265
6266
6267#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6268 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6269 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6270
6271#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6272 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6273 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6274
6275#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6276 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6277 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6278
6279/**
6280 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6281 *
6282 * @returns New off.
6283 * @param pReNative .
6284 * @param off .
6285 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6286 * bit 4 to REX.X. The two bits are part of the
6287 * REG sub-field, which isn't needed in this
6288 * function.
6289 * @param uSibAndRspOffset Two parts:
6290 * - The first 8 bits make up the SIB byte.
6291 * - The next 8 bits are the fixed RSP/ESP offset
6292 * in case of a pop [xSP].
6293 * @param u32Disp The displacement byte/word/dword, if any.
6294 * @param cbInstr The size of the fully decoded instruction. Used
6295 * for RIP relative addressing.
6296 * @param idxVarRet The result variable number.
6297 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6298 * when calculating the address.
6299 *
6300 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6301 */
6302DECL_INLINE_THROW(uint32_t)
6303iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6304 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6305{
6306 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6307
6308 /*
6309 * Special case the rip + disp32 form first.
6310 */
6311 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6312 {
6313 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6314 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6315 kIemNativeGstRegUse_ReadOnly);
6316 if (f64Bit)
6317 {
6318#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6319 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr + (int64_t)pReNative->Core.offPc;
6320#else
6321 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6322#endif
6323#ifdef RT_ARCH_AMD64
6324 if ((int32_t)offFinalDisp == offFinalDisp)
6325 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6326 else
6327 {
6328 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6329 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6330 }
6331#else
6332 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6333#endif
6334 }
6335 else
6336 {
6337# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6338 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr + (int32_t)pReNative->Core.offPc;
6339# else
6340 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr;
6341# endif
6342 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6343 }
6344 iemNativeRegFreeTmp(pReNative, idxRegPc);
6345 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6346 return off;
6347 }
6348
6349 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6350 int64_t i64EffAddr = 0;
6351 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6352 {
6353 case 0: break;
6354 case 1: i64EffAddr = (int8_t)u32Disp; break;
6355 case 2: i64EffAddr = (int32_t)u32Disp; break;
6356 default: AssertFailed();
6357 }
6358
6359 /* Get the register (or SIB) value. */
6360 uint8_t idxGstRegBase = UINT8_MAX;
6361 uint8_t idxGstRegIndex = UINT8_MAX;
6362 uint8_t cShiftIndex = 0;
6363 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6364 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6365 else /* SIB: */
6366 {
6367 /* index /w scaling . */
6368 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6369 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6370 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6371 if (idxGstRegIndex == 4)
6372 {
6373 /* no index */
6374 cShiftIndex = 0;
6375 idxGstRegIndex = UINT8_MAX;
6376 }
6377
6378 /* base */
6379 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6380 if (idxGstRegBase == 4)
6381 {
6382 /* pop [rsp] hack */
6383 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6384 }
6385 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6386 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6387 {
6388 /* mod=0 and base=5 -> disp32, no base reg. */
6389 Assert(i64EffAddr == 0);
6390 i64EffAddr = (int32_t)u32Disp;
6391 idxGstRegBase = UINT8_MAX;
6392 }
6393 }
6394
6395 /*
6396 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6397 * the start of the function.
6398 */
6399 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6400 {
6401 if (f64Bit)
6402 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6403 else
6404 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6405 return off;
6406 }
6407
6408 /*
6409 * Now emit code that calculates:
6410 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6411 * or if !f64Bit:
6412 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6413 */
6414 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6415 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6416 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6417 kIemNativeGstRegUse_ReadOnly);
6418 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6419 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6420 kIemNativeGstRegUse_ReadOnly);
6421
6422 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6423 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6424 {
6425 idxRegBase = idxRegIndex;
6426 idxRegIndex = UINT8_MAX;
6427 }
6428
6429#ifdef RT_ARCH_AMD64
6430 uint8_t bFinalAdj;
6431 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6432 bFinalAdj = 0; /* likely */
6433 else
6434 {
6435 /* pop [rsp] with a problematic disp32 value. Split out the
6436 RSP offset and add it separately afterwards (bFinalAdj). */
6437 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6438 Assert(idxGstRegBase == X86_GREG_xSP);
6439 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6440 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6441 Assert(bFinalAdj != 0);
6442 i64EffAddr -= bFinalAdj;
6443 Assert((int32_t)i64EffAddr == i64EffAddr);
6444 }
6445 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6446//pReNative->pInstrBuf[off++] = 0xcc;
6447
6448 if (idxRegIndex == UINT8_MAX)
6449 {
6450 if (u32EffAddr == 0)
6451 {
6452 /* mov ret, base */
6453 if (f64Bit)
6454 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6455 else
6456 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6457 }
6458 else
6459 {
6460 /* lea ret, [base + disp32] */
6461 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6462 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6463 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6464 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6465 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6466 | (f64Bit ? X86_OP_REX_W : 0);
6467 pbCodeBuf[off++] = 0x8d;
6468 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6469 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6470 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6471 else
6472 {
6473 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6474 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6475 }
6476 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6477 if (bMod == X86_MOD_MEM4)
6478 {
6479 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6480 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6481 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6482 }
6483 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6484 }
6485 }
6486 else
6487 {
6488 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6489 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6490 if (idxRegBase == UINT8_MAX)
6491 {
6492 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6493 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6494 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6495 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6496 | (f64Bit ? X86_OP_REX_W : 0);
6497 pbCodeBuf[off++] = 0x8d;
6498 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6499 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6500 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6501 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6502 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6503 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6504 }
6505 else
6506 {
6507 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6508 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6509 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6510 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6511 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6512 | (f64Bit ? X86_OP_REX_W : 0);
6513 pbCodeBuf[off++] = 0x8d;
6514 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6515 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6516 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6517 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6518 if (bMod != X86_MOD_MEM0)
6519 {
6520 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6521 if (bMod == X86_MOD_MEM4)
6522 {
6523 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6524 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6525 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6526 }
6527 }
6528 }
6529 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6530 }
6531
6532 if (!bFinalAdj)
6533 { /* likely */ }
6534 else
6535 {
6536 Assert(f64Bit);
6537 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6538 }
6539
6540#elif defined(RT_ARCH_ARM64)
6541 if (i64EffAddr == 0)
6542 {
6543 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6544 if (idxRegIndex == UINT8_MAX)
6545 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6546 else if (idxRegBase != UINT8_MAX)
6547 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6548 f64Bit, false /*fSetFlags*/, cShiftIndex);
6549 else
6550 {
6551 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6552 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6553 }
6554 }
6555 else
6556 {
6557 if (f64Bit)
6558 { /* likely */ }
6559 else
6560 i64EffAddr = (int32_t)i64EffAddr;
6561
6562 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6563 {
6564 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6565 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
6566 }
6567 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
6568 {
6569 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6570 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
6571 }
6572 else
6573 {
6574 if (f64Bit)
6575 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
6576 else
6577 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
6578 if (idxRegBase != UINT8_MAX)
6579 {
6580 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6581 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
6582 }
6583 }
6584 if (idxRegIndex != UINT8_MAX)
6585 {
6586 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6587 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6588 f64Bit, false /*fSetFlags*/, cShiftIndex);
6589 }
6590 }
6591
6592#else
6593# error "port me"
6594#endif
6595
6596 if (idxRegIndex != UINT8_MAX)
6597 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6598 if (idxRegBase != UINT8_MAX)
6599 iemNativeRegFreeTmp(pReNative, idxRegBase);
6600 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6601 return off;
6602}
6603
6604
6605/*********************************************************************************************************************************
6606* Memory fetches and stores common *
6607*********************************************************************************************************************************/
6608
6609typedef enum IEMNATIVEMITMEMOP
6610{
6611 kIemNativeEmitMemOp_Store = 0,
6612 kIemNativeEmitMemOp_Fetch,
6613 kIemNativeEmitMemOp_Fetch_Zx_U16,
6614 kIemNativeEmitMemOp_Fetch_Zx_U32,
6615 kIemNativeEmitMemOp_Fetch_Zx_U64,
6616 kIemNativeEmitMemOp_Fetch_Sx_U16,
6617 kIemNativeEmitMemOp_Fetch_Sx_U32,
6618 kIemNativeEmitMemOp_Fetch_Sx_U64
6619} IEMNATIVEMITMEMOP;
6620
6621/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
6622 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
6623 * (with iSegReg = UINT8_MAX). */
6624DECL_INLINE_THROW(uint32_t)
6625iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
6626 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
6627 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
6628{
6629 /*
6630 * Assert sanity.
6631 */
6632 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6633 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6634 Assert( enmOp != kIemNativeEmitMemOp_Store
6635 || pVarValue->enmKind == kIemNativeVarKind_Immediate
6636 || pVarValue->enmKind == kIemNativeVarKind_Stack);
6637 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
6638 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
6639 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
6640 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
6641 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6642 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
6643#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6644 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
6645 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
6646#else
6647 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
6648#endif
6649 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
6650 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6651#ifdef VBOX_STRICT
6652 if (iSegReg == UINT8_MAX)
6653 {
6654 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6655 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6656 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6657 switch (cbMem)
6658 {
6659 case 1:
6660 Assert( pfnFunction
6661 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
6662 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6663 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6664 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6665 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6666 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
6667 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
6668 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
6669 : UINT64_C(0xc000b000a0009000) ));
6670 Assert(!fAlignMaskAndCtl);
6671 break;
6672 case 2:
6673 Assert( pfnFunction
6674 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
6675 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6676 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6677 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6678 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
6679 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
6680 : UINT64_C(0xc000b000a0009000) ));
6681 Assert(fAlignMaskAndCtl <= 1);
6682 break;
6683 case 4:
6684 Assert( pfnFunction
6685 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
6686 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6687 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6688 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
6689 : UINT64_C(0xc000b000a0009000) ));
6690 Assert(fAlignMaskAndCtl <= 3);
6691 break;
6692 case 8:
6693 Assert( pfnFunction
6694 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
6695 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
6696 : UINT64_C(0xc000b000a0009000) ));
6697 Assert(fAlignMaskAndCtl <= 7);
6698 break;
6699#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6700 case sizeof(RTUINT128U):
6701 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6702 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
6703 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6704 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
6705 || ( enmOp == kIemNativeEmitMemOp_Store
6706 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6707 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
6708 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6709 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6710 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6711 : fAlignMaskAndCtl <= 15);
6712 break;
6713 case sizeof(RTUINT256U):
6714 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6715 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
6716 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
6717 || ( enmOp == kIemNativeEmitMemOp_Store
6718 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
6719 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
6720 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
6721 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
6722 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6723 : fAlignMaskAndCtl <= 31);
6724 break;
6725#endif
6726 }
6727 }
6728 else
6729 {
6730 Assert(iSegReg < 6);
6731 switch (cbMem)
6732 {
6733 case 1:
6734 Assert( pfnFunction
6735 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
6736 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
6737 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6738 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6739 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6740 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
6741 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
6742 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
6743 : UINT64_C(0xc000b000a0009000) ));
6744 Assert(!fAlignMaskAndCtl);
6745 break;
6746 case 2:
6747 Assert( pfnFunction
6748 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
6749 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
6750 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6751 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6752 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
6753 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
6754 : UINT64_C(0xc000b000a0009000) ));
6755 Assert(fAlignMaskAndCtl <= 1);
6756 break;
6757 case 4:
6758 Assert( pfnFunction
6759 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
6760 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
6761 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
6762 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
6763 : UINT64_C(0xc000b000a0009000) ));
6764 Assert(fAlignMaskAndCtl <= 3);
6765 break;
6766 case 8:
6767 Assert( pfnFunction
6768 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
6769 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
6770 : UINT64_C(0xc000b000a0009000) ));
6771 Assert(fAlignMaskAndCtl <= 7);
6772 break;
6773#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6774 case sizeof(RTUINT128U):
6775 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6776 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
6777 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6778 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
6779 || ( enmOp == kIemNativeEmitMemOp_Store
6780 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6781 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
6782 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6783 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6784 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6785 : fAlignMaskAndCtl <= 15);
6786 break;
6787 case sizeof(RTUINT256U):
6788 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6789 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
6790 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
6791 || ( enmOp == kIemNativeEmitMemOp_Store
6792 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
6793 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
6794 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
6795 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
6796 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6797 : fAlignMaskAndCtl <= 31);
6798 break;
6799#endif
6800 }
6801 }
6802#endif
6803
6804#ifdef VBOX_STRICT
6805 /*
6806 * Check that the fExec flags we've got make sense.
6807 */
6808 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6809#endif
6810
6811 /*
6812 * To keep things simple we have to commit any pending writes first as we
6813 * may end up making calls.
6814 */
6815 /** @todo we could postpone this till we make the call and reload the
6816 * registers after returning from the call. Not sure if that's sensible or
6817 * not, though. */
6818#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6819 off = iemNativeRegFlushPendingWrites(pReNative, off);
6820#else
6821 /* The program counter is treated differently for now. */
6822 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
6823#endif
6824
6825#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6826 /*
6827 * Move/spill/flush stuff out of call-volatile registers.
6828 * This is the easy way out. We could contain this to the tlb-miss branch
6829 * by saving and restoring active stuff here.
6830 */
6831 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
6832#endif
6833
6834 /*
6835 * Define labels and allocate the result register (trying for the return
6836 * register if we can).
6837 */
6838 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6839#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6840 uint8_t idxRegValueFetch = UINT8_MAX;
6841
6842 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6843 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6844 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
6845 else
6846 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6847 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6848 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6849 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6850#else
6851 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6852 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6853 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6854 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6855#endif
6856 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
6857
6858#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6859 uint8_t idxRegValueStore = UINT8_MAX;
6860
6861 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6862 idxRegValueStore = !TlbState.fSkip
6863 && enmOp == kIemNativeEmitMemOp_Store
6864 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6865 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6866 : UINT8_MAX;
6867 else
6868 idxRegValueStore = !TlbState.fSkip
6869 && enmOp == kIemNativeEmitMemOp_Store
6870 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6871 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6872 : UINT8_MAX;
6873
6874#else
6875 uint8_t const idxRegValueStore = !TlbState.fSkip
6876 && enmOp == kIemNativeEmitMemOp_Store
6877 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6878 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6879 : UINT8_MAX;
6880#endif
6881 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6882 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6883 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6884 : UINT32_MAX;
6885
6886 /*
6887 * Jump to the TLB lookup code.
6888 */
6889 if (!TlbState.fSkip)
6890 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6891
6892 /*
6893 * TlbMiss:
6894 *
6895 * Call helper to do the fetching.
6896 * We flush all guest register shadow copies here.
6897 */
6898 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
6899
6900#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6901 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6902#else
6903 RT_NOREF(idxInstr);
6904#endif
6905
6906#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6907 if (pReNative->Core.offPc)
6908 {
6909 /*
6910 * Update the program counter but restore it at the end of the TlbMiss branch.
6911 * This should allow delaying more program counter updates for the TlbLookup and hit paths
6912 * which are hopefully much more frequent, reducing the amount of memory accesses.
6913 */
6914 /* Allocate a temporary PC register. */
6915/** @todo r=bird: This would technically need to be done up front as it's a register allocation. */
6916 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6917 kIemNativeGstRegUse_ForUpdate);
6918
6919 /* Perform the addition and store the result. */
6920 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6921 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6922# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6923 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
6924# endif
6925
6926 /* Free and flush the PC register. */
6927 iemNativeRegFreeTmp(pReNative, idxPcReg);
6928 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6929 }
6930#endif
6931
6932#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6933 /* Save variables in volatile registers. */
6934 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6935 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
6936 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
6937 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6938#endif
6939
6940 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
6941 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
6942#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6943 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6944 {
6945 /*
6946 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
6947 *
6948 * @note There was a register variable assigned to the variable for the TlbLookup case above
6949 * which must not be freed or the value loaded into the register will not be synced into the register
6950 * further down the road because the variable doesn't know it had a variable assigned.
6951 *
6952 * @note For loads it is not required to sync what is in the assigned register with the stack slot
6953 * as it will be overwritten anyway.
6954 */
6955 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6956 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
6957 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
6958 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6959 }
6960 else
6961#endif
6962 if (enmOp == kIemNativeEmitMemOp_Store)
6963 {
6964 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6965 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
6966#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6967 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6968#else
6969 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
6970 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6971#endif
6972 }
6973
6974 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
6975 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
6976#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6977 fVolGregMask);
6978#else
6979 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
6980#endif
6981
6982 if (iSegReg != UINT8_MAX)
6983 {
6984 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
6985 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
6986 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
6987 }
6988
6989 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6990 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6991
6992 /* Done setting up parameters, make the call. */
6993 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6994
6995 /*
6996 * Put the result in the right register if this is a fetch.
6997 */
6998 if (enmOp != kIemNativeEmitMemOp_Store)
6999 {
7000#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7001 if ( cbMem == sizeof(RTUINT128U)
7002 || cbMem == sizeof(RTUINT256U))
7003 {
7004 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
7005
7006 /* Sync the value on the stack with the host register assigned to the variable. */
7007 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
7008 }
7009 else
7010#endif
7011 {
7012 Assert(idxRegValueFetch == pVarValue->idxReg);
7013 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7014 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
7015 }
7016 }
7017
7018#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7019 /* Restore variables and guest shadow registers to volatile registers. */
7020 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7021 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7022#endif
7023
7024#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7025 if (pReNative->Core.offPc)
7026 {
7027 /*
7028 * Time to restore the program counter to its original value.
7029 */
7030 /* Allocate a temporary PC register. */
7031 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7032 kIemNativeGstRegUse_ForUpdate);
7033
7034 /* Restore the original value. */
7035 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7036 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7037
7038 /* Free and flush the PC register. */
7039 iemNativeRegFreeTmp(pReNative, idxPcReg);
7040 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7041 }
7042#endif
7043
7044#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7045 if (!TlbState.fSkip)
7046 {
7047 /* end of TlbMiss - Jump to the done label. */
7048 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7049 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7050
7051 /*
7052 * TlbLookup:
7053 */
7054 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
7055 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
7056 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
7057
7058 /*
7059 * Emit code to do the actual storing / fetching.
7060 */
7061 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7062# ifdef IEM_WITH_TLB_STATISTICS
7063 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7064 enmOp == kIemNativeEmitMemOp_Store
7065 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
7066 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
7067# endif
7068 switch (enmOp)
7069 {
7070 case kIemNativeEmitMemOp_Store:
7071 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
7072 {
7073 switch (cbMem)
7074 {
7075 case 1:
7076 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7077 break;
7078 case 2:
7079 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7080 break;
7081 case 4:
7082 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7083 break;
7084 case 8:
7085 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7086 break;
7087#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7088 case sizeof(RTUINT128U):
7089 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7090 break;
7091 case sizeof(RTUINT256U):
7092 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7093 break;
7094#endif
7095 default:
7096 AssertFailed();
7097 }
7098 }
7099 else
7100 {
7101 switch (cbMem)
7102 {
7103 case 1:
7104 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
7105 idxRegMemResult, TlbState.idxReg1);
7106 break;
7107 case 2:
7108 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7109 idxRegMemResult, TlbState.idxReg1);
7110 break;
7111 case 4:
7112 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7113 idxRegMemResult, TlbState.idxReg1);
7114 break;
7115 case 8:
7116 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
7117 idxRegMemResult, TlbState.idxReg1);
7118 break;
7119 default:
7120 AssertFailed();
7121 }
7122 }
7123 break;
7124
7125 case kIemNativeEmitMemOp_Fetch:
7126 case kIemNativeEmitMemOp_Fetch_Zx_U16:
7127 case kIemNativeEmitMemOp_Fetch_Zx_U32:
7128 case kIemNativeEmitMemOp_Fetch_Zx_U64:
7129 switch (cbMem)
7130 {
7131 case 1:
7132 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7133 break;
7134 case 2:
7135 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7136 break;
7137 case 4:
7138 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7139 break;
7140 case 8:
7141 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7142 break;
7143#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7144 case sizeof(RTUINT128U):
7145 /*
7146 * No need to sync back the register with the stack, this is done by the generic variable handling
7147 * code if there is a register assigned to a variable and the stack must be accessed.
7148 */
7149 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7150 break;
7151 case sizeof(RTUINT256U):
7152 /*
7153 * No need to sync back the register with the stack, this is done by the generic variable handling
7154 * code if there is a register assigned to a variable and the stack must be accessed.
7155 */
7156 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7157 break;
7158#endif
7159 default:
7160 AssertFailed();
7161 }
7162 break;
7163
7164 case kIemNativeEmitMemOp_Fetch_Sx_U16:
7165 Assert(cbMem == 1);
7166 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7167 break;
7168
7169 case kIemNativeEmitMemOp_Fetch_Sx_U32:
7170 Assert(cbMem == 1 || cbMem == 2);
7171 if (cbMem == 1)
7172 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7173 else
7174 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7175 break;
7176
7177 case kIemNativeEmitMemOp_Fetch_Sx_U64:
7178 switch (cbMem)
7179 {
7180 case 1:
7181 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7182 break;
7183 case 2:
7184 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7185 break;
7186 case 4:
7187 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7188 break;
7189 default:
7190 AssertFailed();
7191 }
7192 break;
7193
7194 default:
7195 AssertFailed();
7196 }
7197
7198 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7199
7200 /*
7201 * TlbDone:
7202 */
7203 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7204
7205 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7206
7207# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7208 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7209 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7210# endif
7211 }
7212#else
7213 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
7214#endif
7215
7216 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7217 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7218 return off;
7219}
7220
7221
7222
7223/*********************************************************************************************************************************
7224* Memory fetches (IEM_MEM_FETCH_XXX). *
7225*********************************************************************************************************************************/
7226
7227/* 8-bit segmented: */
7228#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7229 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
7230 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7231 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7232
7233#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7234 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7235 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7236 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7237
7238#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7239 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7240 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7241 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7242
7243#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7244 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7245 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7246 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7247
7248#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7249 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7250 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7251 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7252
7253#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7254 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7255 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7256 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7257
7258#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7259 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7260 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7261 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7262
7263/* 16-bit segmented: */
7264#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7265 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7266 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7267 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7268
7269#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7270 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7271 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7272 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7273
7274#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7275 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7276 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7277 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7278
7279#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7280 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7281 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7282 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7283
7284#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7285 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7286 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7287 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7288
7289#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7290 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7291 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7292 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7293
7294
7295/* 32-bit segmented: */
7296#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7297 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7298 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7299 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7300
7301#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7302 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7303 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7304 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7305
7306#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7307 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7308 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7309 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7310
7311#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7312 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7313 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7314 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7315
7316#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7317 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7318 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7319 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7320
7321#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7322 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7323 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7324 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7325
7326#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7327 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7328 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7329 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7330
7331#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7332 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7333 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7334 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7335
7336#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7337 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
7338 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7339 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7340
7341AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7342#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7343 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
7344 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7345 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7346
7347
7348/* 64-bit segmented: */
7349#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7350 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7351 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7352 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7353
7354AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7355#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7356 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
7357 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7358 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7359
7360
7361/* 8-bit flat: */
7362#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7363 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
7364 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7365 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7366
7367#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7368 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7369 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7370 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7371
7372#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7373 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7374 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7375 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7376
7377#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7378 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7379 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7380 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7381
7382#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7383 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7384 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7385 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7386
7387#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7388 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7389 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7390 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7391
7392#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7393 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7394 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7395 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7396
7397
7398/* 16-bit flat: */
7399#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7400 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7401 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7402 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7403
7404#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7405 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7406 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7407 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7408
7409#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7410 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7411 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7412 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7413
7414#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7415 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7416 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7417 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7418
7419#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7420 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7421 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7422 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7423
7424#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7425 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7426 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7427 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7428
7429/* 32-bit flat: */
7430#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7431 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7432 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7433 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7434
7435#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7436 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7437 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7438 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7439
7440#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7441 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7442 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7443 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7444
7445#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7446 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7447 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7448 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7449
7450#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7451 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7452 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7453 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7454
7455#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7456 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7457 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7458 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7459
7460#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7461 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7462 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7463 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7464
7465#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7466 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7467 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7468 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7469
7470#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7471 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7472 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7473 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7474
7475#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7476 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7477 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7478 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7479
7480
7481/* 64-bit flat: */
7482#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7483 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7484 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7485 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7486
7487#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7488 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7489 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7490 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7491
7492#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7493/* 128-bit segmented: */
7494#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7495 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7496 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7497 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7498
7499#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7500 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7501 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7502 kIemNativeEmitMemOp_Fetch, \
7503 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7504
7505AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7506#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7507 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
7508 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7509 kIemNativeEmitMemOp_Fetch, \
7510 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7511
7512#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7513 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7514 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7515 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7516
7517#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7518 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7519 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7520 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7521
7522
7523/* 128-bit flat: */
7524#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7525 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7526 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7527 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7528
7529#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7530 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7531 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7532 kIemNativeEmitMemOp_Fetch, \
7533 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7534
7535#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7536 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
7537 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7538 kIemNativeEmitMemOp_Fetch, \
7539 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7540
7541#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7542 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7543 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7544 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7545
7546#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7547 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
7548 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7549 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7550
7551/* 256-bit segmented: */
7552#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7553 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7554 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7555 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7556
7557#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7558 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7559 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7560 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7561
7562#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7563 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7564 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7565 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7566
7567#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7568 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7569 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7570 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7571
7572
7573/* 256-bit flat: */
7574#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
7575 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7576 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7577 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7578
7579#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
7580 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7581 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7582 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7583
7584#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
7585 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7586 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7587 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7588
7589#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
7590 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, \
7591 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7592 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7593
7594#endif
7595
7596
7597/*********************************************************************************************************************************
7598* Memory stores (IEM_MEM_STORE_XXX). *
7599*********************************************************************************************************************************/
7600
7601#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
7602 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
7603 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7604 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7605
7606#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
7607 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
7608 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7609 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7610
7611#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
7612 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
7613 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7614 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7615
7616#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
7617 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
7618 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7619 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7620
7621
7622#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
7623 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
7624 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7625 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7626
7627#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
7628 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
7629 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7630 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7631
7632#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
7633 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
7634 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7635 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7636
7637#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
7638 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
7639 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7640 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7641
7642
7643#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
7644 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7645 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7646
7647#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
7648 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7649 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7650
7651#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
7652 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7653 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7654
7655#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
7656 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7657 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7658
7659
7660#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
7661 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7662 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7663
7664#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
7665 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7666 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7667
7668#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
7669 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7670 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7671
7672#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
7673 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7674 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7675
7676/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
7677 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
7678DECL_INLINE_THROW(uint32_t)
7679iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
7680 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
7681{
7682 /*
7683 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
7684 * to do the grunt work.
7685 */
7686 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
7687 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
7688 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
7689 pfnFunction, idxInstr);
7690 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
7691 return off;
7692}
7693
7694
7695#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7696# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
7697 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7698 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7699 kIemNativeEmitMemOp_Store, \
7700 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
7701
7702# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
7703 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
7704 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7705 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
7706
7707# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
7708 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
7709 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7710 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
7711
7712# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
7713 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7714 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7715 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7716
7717
7718# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
7719 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7720 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7721 kIemNativeEmitMemOp_Store, \
7722 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
7723
7724# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
7725 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
7726 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7727 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
7728
7729# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
7730 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
7731 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7732 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
7733
7734# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
7735 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7736 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7737 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7738#endif
7739
7740
7741
7742/*********************************************************************************************************************************
7743* Stack Accesses. *
7744*********************************************************************************************************************************/
7745/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
7746#define IEM_MC_PUSH_U16(a_u16Value) \
7747 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7748 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
7749#define IEM_MC_PUSH_U32(a_u32Value) \
7750 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7751 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
7752#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
7753 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
7754 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
7755#define IEM_MC_PUSH_U64(a_u64Value) \
7756 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7757 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
7758
7759#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
7760 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7761 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7762#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
7763 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7764 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
7765#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
7766 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
7767 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
7768
7769#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
7770 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7771 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7772#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
7773 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7774 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
7775
7776
7777/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
7778DECL_INLINE_THROW(uint32_t)
7779iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
7780 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7781{
7782 /*
7783 * Assert sanity.
7784 */
7785 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7786 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7787#ifdef VBOX_STRICT
7788 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7789 {
7790 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7791 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7792 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7793 Assert( pfnFunction
7794 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7795 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
7796 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
7797 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7798 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
7799 : UINT64_C(0xc000b000a0009000) ));
7800 }
7801 else
7802 Assert( pfnFunction
7803 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
7804 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
7805 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
7806 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
7807 : UINT64_C(0xc000b000a0009000) ));
7808#endif
7809
7810#ifdef VBOX_STRICT
7811 /*
7812 * Check that the fExec flags we've got make sense.
7813 */
7814 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7815#endif
7816
7817 /*
7818 * To keep things simple we have to commit any pending writes first as we
7819 * may end up making calls.
7820 */
7821 /** @todo we could postpone this till we make the call and reload the
7822 * registers after returning from the call. Not sure if that's sensible or
7823 * not, though. */
7824 off = iemNativeRegFlushPendingWrites(pReNative, off);
7825
7826 /*
7827 * First we calculate the new RSP and the effective stack pointer value.
7828 * For 64-bit mode and flat 32-bit these two are the same.
7829 * (Code structure is very similar to that of PUSH)
7830 */
7831 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7832 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
7833 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
7834 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
7835 ? cbMem : sizeof(uint16_t);
7836 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7837 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7838 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7839 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7840 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7841 if (cBitsFlat != 0)
7842 {
7843 Assert(idxRegEffSp == idxRegRsp);
7844 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7845 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7846 if (cBitsFlat == 64)
7847 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
7848 else
7849 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
7850 }
7851 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7852 {
7853 Assert(idxRegEffSp != idxRegRsp);
7854 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7855 kIemNativeGstRegUse_ReadOnly);
7856#ifdef RT_ARCH_AMD64
7857 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7858#else
7859 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7860#endif
7861 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7862 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7863 offFixupJumpToUseOtherBitSp = off;
7864 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7865 {
7866 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7867 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7868 }
7869 else
7870 {
7871 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7872 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7873 }
7874 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7875 }
7876 /* SpUpdateEnd: */
7877 uint32_t const offLabelSpUpdateEnd = off;
7878
7879 /*
7880 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7881 * we're skipping lookup).
7882 */
7883 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7884 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
7885 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7886 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7887 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7888 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7889 : UINT32_MAX;
7890 uint8_t const idxRegValue = !TlbState.fSkip
7891 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7892 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
7893 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
7894 : UINT8_MAX;
7895 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7896
7897
7898 if (!TlbState.fSkip)
7899 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7900 else
7901 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7902
7903 /*
7904 * Use16BitSp:
7905 */
7906 if (cBitsFlat == 0)
7907 {
7908#ifdef RT_ARCH_AMD64
7909 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7910#else
7911 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7912#endif
7913 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7914 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7915 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7916 else
7917 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7918 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7919 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7920 }
7921
7922 /*
7923 * TlbMiss:
7924 *
7925 * Call helper to do the pushing.
7926 */
7927 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7928
7929#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7930 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7931#else
7932 RT_NOREF(idxInstr);
7933#endif
7934
7935 /* Save variables in volatile registers. */
7936 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7937 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7938 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
7939 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
7940 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7941
7942 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
7943 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
7944 {
7945 /* Swap them using ARG0 as temp register: */
7946 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
7947 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
7948 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
7949 }
7950 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
7951 {
7952 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
7953 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
7954 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7955
7956 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
7957 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7958 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7959 }
7960 else
7961 {
7962 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
7963 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7964
7965 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
7966 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
7967 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
7968 }
7969
7970 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7971 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7972
7973 /* Done setting up parameters, make the call. */
7974 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7975
7976 /* Restore variables and guest shadow registers to volatile registers. */
7977 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7978 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7979
7980#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7981 if (!TlbState.fSkip)
7982 {
7983 /* end of TlbMiss - Jump to the done label. */
7984 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7985 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7986
7987 /*
7988 * TlbLookup:
7989 */
7990 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
7991 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7992
7993 /*
7994 * Emit code to do the actual storing / fetching.
7995 */
7996 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7997# ifdef IEM_WITH_TLB_STATISTICS
7998 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7999 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8000# endif
8001 if (idxRegValue != UINT8_MAX)
8002 {
8003 switch (cbMemAccess)
8004 {
8005 case 2:
8006 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8007 break;
8008 case 4:
8009 if (!fIsIntelSeg)
8010 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8011 else
8012 {
8013 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
8014 PUSH FS in real mode, so we have to try emulate that here.
8015 We borrow the now unused idxReg1 from the TLB lookup code here. */
8016 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
8017 kIemNativeGstReg_EFlags);
8018 if (idxRegEfl != UINT8_MAX)
8019 {
8020#ifdef ARCH_AMD64
8021 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
8022 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8023 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8024#else
8025 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
8026 off, TlbState.idxReg1, idxRegEfl,
8027 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8028#endif
8029 iemNativeRegFreeTmp(pReNative, idxRegEfl);
8030 }
8031 else
8032 {
8033 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
8034 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8035 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8036 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8037 }
8038 /* ASSUMES the upper half of idxRegValue is ZERO. */
8039 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
8040 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
8041 }
8042 break;
8043 case 8:
8044 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8045 break;
8046 default:
8047 AssertFailed();
8048 }
8049 }
8050 else
8051 {
8052 switch (cbMemAccess)
8053 {
8054 case 2:
8055 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
8056 idxRegMemResult, TlbState.idxReg1);
8057 break;
8058 case 4:
8059 Assert(!fIsSegReg);
8060 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
8061 idxRegMemResult, TlbState.idxReg1);
8062 break;
8063 case 8:
8064 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
8065 break;
8066 default:
8067 AssertFailed();
8068 }
8069 }
8070
8071 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8072 TlbState.freeRegsAndReleaseVars(pReNative);
8073
8074 /*
8075 * TlbDone:
8076 *
8077 * Commit the new RSP value.
8078 */
8079 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8080 }
8081#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8082
8083#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8084 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
8085#endif
8086 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8087 if (idxRegEffSp != idxRegRsp)
8088 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8089
8090 /* The value variable is implictly flushed. */
8091 if (idxRegValue != UINT8_MAX)
8092 iemNativeVarRegisterRelease(pReNative, idxVarValue);
8093 iemNativeVarFreeLocal(pReNative, idxVarValue);
8094
8095 return off;
8096}
8097
8098
8099
8100/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
8101#define IEM_MC_POP_GREG_U16(a_iGReg) \
8102 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8103 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
8104#define IEM_MC_POP_GREG_U32(a_iGReg) \
8105 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8106 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
8107#define IEM_MC_POP_GREG_U64(a_iGReg) \
8108 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8109 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
8110
8111#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
8112 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8113 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8114#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
8115 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8116 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
8117
8118#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
8119 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8120 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8121#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
8122 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8123 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
8124
8125
8126DECL_FORCE_INLINE_THROW(uint32_t)
8127iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
8128 uint8_t idxRegTmp)
8129{
8130 /* Use16BitSp: */
8131#ifdef RT_ARCH_AMD64
8132 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8133 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
8134 RT_NOREF(idxRegTmp);
8135#else
8136 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
8137 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
8138 /* add tmp, regrsp, #cbMem */
8139 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
8140 /* and tmp, tmp, #0xffff */
8141 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
8142 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
8143 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
8144 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
8145#endif
8146 return off;
8147}
8148
8149
8150DECL_FORCE_INLINE(uint32_t)
8151iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
8152{
8153 /* Use32BitSp: */
8154 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8155 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
8156 return off;
8157}
8158
8159
8160/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
8161DECL_INLINE_THROW(uint32_t)
8162iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
8163 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8164{
8165 /*
8166 * Assert sanity.
8167 */
8168 Assert(idxGReg < 16);
8169#ifdef VBOX_STRICT
8170 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8171 {
8172 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8173 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8174 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8175 Assert( pfnFunction
8176 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8177 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
8178 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8179 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
8180 : UINT64_C(0xc000b000a0009000) ));
8181 }
8182 else
8183 Assert( pfnFunction
8184 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
8185 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
8186 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
8187 : UINT64_C(0xc000b000a0009000) ));
8188#endif
8189
8190#ifdef VBOX_STRICT
8191 /*
8192 * Check that the fExec flags we've got make sense.
8193 */
8194 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8195#endif
8196
8197 /*
8198 * To keep things simple we have to commit any pending writes first as we
8199 * may end up making calls.
8200 */
8201 off = iemNativeRegFlushPendingWrites(pReNative, off);
8202
8203 /*
8204 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
8205 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
8206 * directly as the effective stack pointer.
8207 * (Code structure is very similar to that of PUSH)
8208 */
8209 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8210 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8211 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8212 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8213 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8214 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8215 * will be the resulting register value. */
8216 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8217
8218 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8219 if (cBitsFlat != 0)
8220 {
8221 Assert(idxRegEffSp == idxRegRsp);
8222 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8223 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8224 }
8225 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8226 {
8227 Assert(idxRegEffSp != idxRegRsp);
8228 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8229 kIemNativeGstRegUse_ReadOnly);
8230#ifdef RT_ARCH_AMD64
8231 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8232#else
8233 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8234#endif
8235 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8236 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8237 offFixupJumpToUseOtherBitSp = off;
8238 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8239 {
8240/** @todo can skip idxRegRsp updating when popping ESP. */
8241 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8242 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8243 }
8244 else
8245 {
8246 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8247 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8248 }
8249 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8250 }
8251 /* SpUpdateEnd: */
8252 uint32_t const offLabelSpUpdateEnd = off;
8253
8254 /*
8255 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8256 * we're skipping lookup).
8257 */
8258 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8259 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8260 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8261 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8262 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8263 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8264 : UINT32_MAX;
8265
8266 if (!TlbState.fSkip)
8267 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8268 else
8269 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8270
8271 /*
8272 * Use16BitSp:
8273 */
8274 if (cBitsFlat == 0)
8275 {
8276#ifdef RT_ARCH_AMD64
8277 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8278#else
8279 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8280#endif
8281 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8282 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8283 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8284 else
8285 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8286 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8287 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8288 }
8289
8290 /*
8291 * TlbMiss:
8292 *
8293 * Call helper to do the pushing.
8294 */
8295 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8296
8297#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8298 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8299#else
8300 RT_NOREF(idxInstr);
8301#endif
8302
8303 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8304 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8305 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8306 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8307
8308
8309 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8310 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8311 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8312
8313 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8314 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8315
8316 /* Done setting up parameters, make the call. */
8317 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8318
8319 /* Move the return register content to idxRegMemResult. */
8320 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8321 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8322
8323 /* Restore variables and guest shadow registers to volatile registers. */
8324 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8325 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8326
8327#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8328 if (!TlbState.fSkip)
8329 {
8330 /* end of TlbMiss - Jump to the done label. */
8331 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8332 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8333
8334 /*
8335 * TlbLookup:
8336 */
8337 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
8338 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8339
8340 /*
8341 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8342 */
8343 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8344# ifdef IEM_WITH_TLB_STATISTICS
8345 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8346 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8347# endif
8348 switch (cbMem)
8349 {
8350 case 2:
8351 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8352 break;
8353 case 4:
8354 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8355 break;
8356 case 8:
8357 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8358 break;
8359 default:
8360 AssertFailed();
8361 }
8362
8363 TlbState.freeRegsAndReleaseVars(pReNative);
8364
8365 /*
8366 * TlbDone:
8367 *
8368 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8369 * commit the popped register value.
8370 */
8371 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8372 }
8373#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8374
8375 if (idxGReg != X86_GREG_xSP)
8376 {
8377 /* Set the register. */
8378 if (cbMem >= sizeof(uint32_t))
8379 {
8380#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8381 AssertMsg( pReNative->idxCurCall == 0
8382 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8383 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8384 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8385#endif
8386 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8387#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8388 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8389#endif
8390#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8391 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8392 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8393#endif
8394 }
8395 else
8396 {
8397 Assert(cbMem == sizeof(uint16_t));
8398 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8399 kIemNativeGstRegUse_ForUpdate);
8400 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8401#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8402 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8403#endif
8404 iemNativeRegFreeTmp(pReNative, idxRegDst);
8405 }
8406
8407 /* Complete RSP calculation for FLAT mode. */
8408 if (idxRegEffSp == idxRegRsp)
8409 {
8410 if (cBitsFlat == 64)
8411 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8412 else
8413 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8414 }
8415 }
8416 else
8417 {
8418 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8419 if (cbMem == sizeof(uint64_t))
8420 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8421 else if (cbMem == sizeof(uint32_t))
8422 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8423 else
8424 {
8425 if (idxRegEffSp == idxRegRsp)
8426 {
8427 if (cBitsFlat == 64)
8428 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8429 else
8430 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8431 }
8432 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8433 }
8434 }
8435
8436#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8437 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8438#endif
8439
8440 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8441 if (idxRegEffSp != idxRegRsp)
8442 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8443 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8444
8445 return off;
8446}
8447
8448
8449
8450/*********************************************************************************************************************************
8451* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8452*********************************************************************************************************************************/
8453
8454#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8455 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8456 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8457 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8458
8459#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8460 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8461 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8462 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8463
8464#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8465 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8466 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8467 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8468
8469#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8470 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8471 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8472 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8473
8474
8475#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8476 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8477 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8478 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8479
8480#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8481 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8482 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8483 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8484
8485#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8486 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8487 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8488 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8489
8490#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8491 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8492 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8493 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8494
8495#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8496 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8497 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8498 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8499
8500
8501#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8502 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8503 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8504 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8505
8506#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8507 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8508 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8509 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8510
8511#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8512 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8513 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8514 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8515
8516#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8517 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8518 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8519 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8520
8521#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8522 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8523 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8524 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8525
8526
8527#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8528 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8529 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8530 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8531
8532#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8533 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8534 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8535 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8536#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8537 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8538 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8539 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8540
8541#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8542 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8543 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8544 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8545
8546#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8547 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8548 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8549 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8550
8551
8552#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8553 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8554 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8555 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8556
8557#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8558 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8559 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8560 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8561
8562
8563#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8564 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8565 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8566 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8567
8568#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8569 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8570 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8571 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8572
8573#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8574 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8575 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8576 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8577
8578#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8579 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8580 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8581 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8582
8583
8584
8585#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8586 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8587 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8588 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8589
8590#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8591 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8592 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8593 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8594
8595#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8596 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8597 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8598 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
8599
8600#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8601 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8602 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8603 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
8604
8605
8606#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8607 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8608 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8609 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
8610
8611#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8612 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8613 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8614 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
8615
8616#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8617 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8618 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8619 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8620
8621#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8622 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8623 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8624 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
8625
8626#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
8627 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
8628 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8629 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8630
8631
8632#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8633 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8634 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8635 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
8636
8637#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8638 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8639 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8640 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
8641
8642#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8643 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8644 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8645 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8646
8647#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8648 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8649 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8650 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
8651
8652#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
8653 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
8654 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8655 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8656
8657
8658#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8659 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8660 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8661 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
8662
8663#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8664 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8665 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8666 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
8667
8668#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8669 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8670 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8671 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8672
8673#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8674 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8675 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8676 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
8677
8678#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
8679 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
8680 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8681 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8682
8683
8684#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
8685 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8686 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8687 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
8688
8689#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
8690 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8691 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8692 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
8693
8694
8695#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8696 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8697 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8698 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
8699
8700#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8701 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8702 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8703 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
8704
8705#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8706 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8707 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8708 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
8709
8710#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8711 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8712 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8713 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
8714
8715
8716DECL_INLINE_THROW(uint32_t)
8717iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
8718 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
8719 uintptr_t pfnFunction, uint8_t idxInstr)
8720{
8721 /*
8722 * Assert sanity.
8723 */
8724 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
8725 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
8726 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
8727 && pVarMem->cbVar == sizeof(void *),
8728 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8729
8730 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8731 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8732 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
8733 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
8734 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8735
8736 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
8737 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
8738 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
8739 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
8740 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8741
8742 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
8743
8744 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
8745
8746#ifdef VBOX_STRICT
8747# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
8748 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
8749 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
8750 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
8751 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
8752# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
8753 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
8754 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
8755 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
8756
8757 if (iSegReg == UINT8_MAX)
8758 {
8759 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8760 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8761 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8762 switch (cbMem)
8763 {
8764 case 1:
8765 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
8766 Assert(!fAlignMaskAndCtl);
8767 break;
8768 case 2:
8769 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
8770 Assert(fAlignMaskAndCtl < 2);
8771 break;
8772 case 4:
8773 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
8774 Assert(fAlignMaskAndCtl < 4);
8775 break;
8776 case 8:
8777 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
8778 Assert(fAlignMaskAndCtl < 8);
8779 break;
8780 case 10:
8781 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
8782 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
8783 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8784 Assert(fAlignMaskAndCtl < 8);
8785 break;
8786 case 16:
8787 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
8788 Assert(fAlignMaskAndCtl < 16);
8789 break;
8790# if 0
8791 case 32:
8792 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
8793 Assert(fAlignMaskAndCtl < 32);
8794 break;
8795 case 64:
8796 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
8797 Assert(fAlignMaskAndCtl < 64);
8798 break;
8799# endif
8800 default: AssertFailed(); break;
8801 }
8802 }
8803 else
8804 {
8805 Assert(iSegReg < 6);
8806 switch (cbMem)
8807 {
8808 case 1:
8809 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
8810 Assert(!fAlignMaskAndCtl);
8811 break;
8812 case 2:
8813 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
8814 Assert(fAlignMaskAndCtl < 2);
8815 break;
8816 case 4:
8817 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
8818 Assert(fAlignMaskAndCtl < 4);
8819 break;
8820 case 8:
8821 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
8822 Assert(fAlignMaskAndCtl < 8);
8823 break;
8824 case 10:
8825 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
8826 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
8827 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8828 Assert(fAlignMaskAndCtl < 8);
8829 break;
8830 case 16:
8831 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
8832 Assert(fAlignMaskAndCtl < 16);
8833 break;
8834# if 0
8835 case 32:
8836 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
8837 Assert(fAlignMaskAndCtl < 32);
8838 break;
8839 case 64:
8840 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
8841 Assert(fAlignMaskAndCtl < 64);
8842 break;
8843# endif
8844 default: AssertFailed(); break;
8845 }
8846 }
8847# undef IEM_MAP_HLP_FN
8848# undef IEM_MAP_HLP_FN_NO_AT
8849#endif
8850
8851#ifdef VBOX_STRICT
8852 /*
8853 * Check that the fExec flags we've got make sense.
8854 */
8855 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8856#endif
8857
8858 /*
8859 * To keep things simple we have to commit any pending writes first as we
8860 * may end up making calls.
8861 */
8862 off = iemNativeRegFlushPendingWrites(pReNative, off);
8863
8864#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8865 /*
8866 * Move/spill/flush stuff out of call-volatile registers.
8867 * This is the easy way out. We could contain this to the tlb-miss branch
8868 * by saving and restoring active stuff here.
8869 */
8870 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
8871 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
8872#endif
8873
8874 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
8875 while the tlb-miss codepath will temporarily put it on the stack.
8876 Set the the type to stack here so we don't need to do it twice below. */
8877 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
8878 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
8879 /** @todo use a tmp register from TlbState, since they'll be free after tlb
8880 * lookup is done. */
8881
8882 /*
8883 * Define labels and allocate the result register (trying for the return
8884 * register if we can).
8885 */
8886 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8887 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
8888 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
8889 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
8890 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
8891 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8892 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8893 : UINT32_MAX;
8894
8895 /*
8896 * Jump to the TLB lookup code.
8897 */
8898 if (!TlbState.fSkip)
8899 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8900
8901 /*
8902 * TlbMiss:
8903 *
8904 * Call helper to do the fetching.
8905 * We flush all guest register shadow copies here.
8906 */
8907 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
8908
8909#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8910 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8911#else
8912 RT_NOREF(idxInstr);
8913#endif
8914
8915#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8916 /* Save variables in volatile registers. */
8917 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
8918 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8919#endif
8920
8921 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
8922 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
8923#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8924 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
8925#else
8926 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8927#endif
8928
8929 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
8930 if (iSegReg != UINT8_MAX)
8931 {
8932 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
8933 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
8934 }
8935
8936 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
8937 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
8938 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
8939
8940 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8941 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8942
8943 /* Done setting up parameters, make the call. */
8944 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8945
8946 /*
8947 * Put the output in the right registers.
8948 */
8949 Assert(idxRegMemResult == pVarMem->idxReg);
8950 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8951 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8952
8953#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8954 /* Restore variables and guest shadow registers to volatile registers. */
8955 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8956 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8957#endif
8958
8959 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
8960 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
8961
8962#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8963 if (!TlbState.fSkip)
8964 {
8965 /* end of tlbsmiss - Jump to the done label. */
8966 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8967 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8968
8969 /*
8970 * TlbLookup:
8971 */
8972 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
8973 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8974# ifdef IEM_WITH_TLB_STATISTICS
8975 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
8976 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
8977# endif
8978
8979 /* [idxVarUnmapInfo] = 0; */
8980 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
8981
8982 /*
8983 * TlbDone:
8984 */
8985 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8986
8987 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
8988
8989# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8990 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
8991 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8992# endif
8993 }
8994#else
8995 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
8996#endif
8997
8998 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8999 iemNativeVarRegisterRelease(pReNative, idxVarMem);
9000
9001 return off;
9002}
9003
9004
9005#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
9006 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
9007 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
9008
9009#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9010 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
9011 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
9012
9013#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9014 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
9015 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
9016
9017#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9018 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
9019 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
9020
9021DECL_INLINE_THROW(uint32_t)
9022iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9023 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
9024{
9025 /*
9026 * Assert sanity.
9027 */
9028 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9029#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
9030 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9031#endif
9032 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
9033 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9034 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9035#ifdef VBOX_STRICT
9036 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
9037 {
9038 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
9039 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
9040 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
9041 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9042 case IEM_ACCESS_TYPE_WRITE:
9043 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9044 case IEM_ACCESS_TYPE_READ:
9045 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9046 default: AssertFailed();
9047 }
9048#else
9049 RT_NOREF(fAccess);
9050#endif
9051
9052 /*
9053 * To keep things simple we have to commit any pending writes first as we
9054 * may end up making calls (there shouldn't be any at this point, so this
9055 * is just for consistency).
9056 */
9057 /** @todo we could postpone this till we make the call and reload the
9058 * registers after returning from the call. Not sure if that's sensible or
9059 * not, though. */
9060 off = iemNativeRegFlushPendingWrites(pReNative, off);
9061
9062 /*
9063 * Move/spill/flush stuff out of call-volatile registers.
9064 *
9065 * We exclude any register holding the bUnmapInfo variable, as we'll be
9066 * checking it after returning from the call and will free it afterwards.
9067 */
9068 /** @todo save+restore active registers and maybe guest shadows in miss
9069 * scenario. */
9070 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
9071 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
9072
9073 /*
9074 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9075 * to call the unmap helper function.
9076 *
9077 * The likelyhood of it being zero is higher than for the TLB hit when doing
9078 * the mapping, as a TLB miss for an well aligned and unproblematic memory
9079 * access should also end up with a mapping that won't need special unmapping.
9080 */
9081 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
9082 * should speed up things for the pure interpreter as well when TLBs
9083 * are enabled. */
9084#ifdef RT_ARCH_AMD64
9085 if (pVarUnmapInfo->idxReg == UINT8_MAX)
9086 {
9087 /* test byte [rbp - xxx], 0ffh */
9088 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9089 pbCodeBuf[off++] = 0xf6;
9090 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
9091 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9092 pbCodeBuf[off++] = 0xff;
9093 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9094 }
9095 else
9096#endif
9097 {
9098 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
9099 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
9100 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9101 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9102 }
9103 uint32_t const offJmpFixup = off;
9104 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
9105
9106 /*
9107 * Call the unmap helper function.
9108 */
9109#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9110 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9111#else
9112 RT_NOREF(idxInstr);
9113#endif
9114
9115 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
9116 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
9117 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9118
9119 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9120 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9121
9122 /* Done setting up parameters, make the call. */
9123 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9124
9125 /* The bUnmapInfo variable is implictly free by these MCs. */
9126 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
9127
9128 /*
9129 * Done, just fixup the jump for the non-call case.
9130 */
9131 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9132
9133 return off;
9134}
9135
9136
9137
9138/*********************************************************************************************************************************
9139* State and Exceptions *
9140*********************************************************************************************************************************/
9141
9142#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9143#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9144
9145#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9146#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9147#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9148
9149#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9150#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9151#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9152
9153
9154DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9155{
9156#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
9157 RT_NOREF(pReNative, fForChange);
9158#else
9159 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
9160 && fForChange)
9161 {
9162# ifdef RT_ARCH_AMD64
9163
9164 /* Need to save the host MXCSR the first time, and clear the exception flags. */
9165 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9166 {
9167 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9168
9169 /* stmxcsr */
9170 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9171 pbCodeBuf[off++] = X86_OP_REX_B;
9172 pbCodeBuf[off++] = 0x0f;
9173 pbCodeBuf[off++] = 0xae;
9174 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9175 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9176 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9177 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9178 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9179 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9180
9181 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9182 }
9183
9184 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9185 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9186
9187 /*
9188 * Mask any exceptions and clear the exception status and save into MXCSR,
9189 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
9190 * a register source/target (sigh).
9191 */
9192 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
9193 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
9194 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
9195 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9196
9197 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9198
9199 /* ldmxcsr */
9200 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9201 pbCodeBuf[off++] = X86_OP_REX_B;
9202 pbCodeBuf[off++] = 0x0f;
9203 pbCodeBuf[off++] = 0xae;
9204 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9205 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9206 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9207 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9208 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9209 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9210
9211 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9212 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9213
9214# elif defined(RT_ARCH_ARM64)
9215 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9216
9217 /* Need to save the host floating point control register the first time, clear FPSR. */
9218 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9219 {
9220 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9221 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9222 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9223 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9224 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9225 }
9226
9227 /*
9228 * Translate MXCSR to FPCR.
9229 *
9230 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9231 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9232 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9233 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9234 */
9235 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9236 * and implement alternate handling if FEAT_AFP is present. */
9237 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9238
9239 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9240
9241 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9242 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9243
9244 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9245 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9246 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9247 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9248 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9249
9250 /*
9251 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9252 *
9253 * Value MXCSR FPCR
9254 * 0 RN RN
9255 * 1 R- R+
9256 * 2 R+ R-
9257 * 3 RZ RZ
9258 *
9259 * Conversion can be achieved by switching bit positions
9260 */
9261 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9262 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9263 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9264 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9265
9266 /* Write the value to FPCR. */
9267 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9268
9269 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9270 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9271 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9272# else
9273# error "Port me"
9274# endif
9275 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9276 }
9277#endif
9278 return off;
9279}
9280
9281
9282
9283/*********************************************************************************************************************************
9284* Emitters for FPU related operations. *
9285*********************************************************************************************************************************/
9286
9287#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9288 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9289
9290/** Emits code for IEM_MC_FETCH_FCW. */
9291DECL_INLINE_THROW(uint32_t)
9292iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9293{
9294 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9295 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9296
9297 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9298
9299 /* Allocate a temporary FCW register. */
9300 /** @todo eliminate extra register */
9301 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9302 kIemNativeGstRegUse_ReadOnly);
9303
9304 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9305
9306 /* Free but don't flush the FCW register. */
9307 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9308 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9309
9310 return off;
9311}
9312
9313
9314#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9315 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9316
9317/** Emits code for IEM_MC_FETCH_FSW. */
9318DECL_INLINE_THROW(uint32_t)
9319iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9320{
9321 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9322 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9323
9324 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
9325 /* Allocate a temporary FSW register. */
9326 /** @todo eliminate extra register */
9327 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9328 kIemNativeGstRegUse_ReadOnly);
9329
9330 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9331
9332 /* Free but don't flush the FSW register. */
9333 iemNativeRegFreeTmp(pReNative, idxFswReg);
9334 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9335
9336 return off;
9337}
9338
9339
9340
9341#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9342
9343
9344/*********************************************************************************************************************************
9345* Emitters for SSE/AVX specific operations. *
9346*********************************************************************************************************************************/
9347
9348#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9349 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9350
9351/** Emits code for IEM_MC_COPY_XREG_U128. */
9352DECL_INLINE_THROW(uint32_t)
9353iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9354{
9355 /* This is a nop if the source and destination register are the same. */
9356 if (iXRegDst != iXRegSrc)
9357 {
9358 /* Allocate destination and source register. */
9359 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9360 kIemNativeGstSimdRegLdStSz_Low128,
9361 kIemNativeGstRegUse_ForFullWrite);
9362 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9363 kIemNativeGstSimdRegLdStSz_Low128,
9364 kIemNativeGstRegUse_ReadOnly);
9365
9366 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9367
9368 /* Free but don't flush the source and destination register. */
9369 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9370 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9371 }
9372
9373 return off;
9374}
9375
9376
9377#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9378 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9379
9380/** Emits code for IEM_MC_FETCH_XREG_U128. */
9381DECL_INLINE_THROW(uint32_t)
9382iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9383{
9384 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9385 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9386
9387 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9388 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9389
9390 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9391
9392 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9393
9394 /* Free but don't flush the source register. */
9395 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9396 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9397
9398 return off;
9399}
9400
9401
9402#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9403 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9404
9405#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9406 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9407
9408/** Emits code for IEM_MC_FETCH_XREG_U64. */
9409DECL_INLINE_THROW(uint32_t)
9410iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9411{
9412 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9413 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9414
9415 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9416 kIemNativeGstSimdRegLdStSz_Low128,
9417 kIemNativeGstRegUse_ReadOnly);
9418
9419 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9420 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9421
9422 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9423
9424 /* Free but don't flush the source register. */
9425 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9426 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9427
9428 return off;
9429}
9430
9431
9432#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9433 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9434
9435#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9436 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9437
9438/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9439DECL_INLINE_THROW(uint32_t)
9440iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9441{
9442 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9443 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9444
9445 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9446 kIemNativeGstSimdRegLdStSz_Low128,
9447 kIemNativeGstRegUse_ReadOnly);
9448
9449 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9450 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9451
9452 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9453
9454 /* Free but don't flush the source register. */
9455 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9456 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9457
9458 return off;
9459}
9460
9461
9462#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9463 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9464
9465/** Emits code for IEM_MC_FETCH_XREG_U16. */
9466DECL_INLINE_THROW(uint32_t)
9467iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9468{
9469 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9470 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9471
9472 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9473 kIemNativeGstSimdRegLdStSz_Low128,
9474 kIemNativeGstRegUse_ReadOnly);
9475
9476 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9477 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9478
9479 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9480
9481 /* Free but don't flush the source register. */
9482 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9483 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9484
9485 return off;
9486}
9487
9488
9489#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9490 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9491
9492/** Emits code for IEM_MC_FETCH_XREG_U8. */
9493DECL_INLINE_THROW(uint32_t)
9494iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9495{
9496 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9497 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9498
9499 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9500 kIemNativeGstSimdRegLdStSz_Low128,
9501 kIemNativeGstRegUse_ReadOnly);
9502
9503 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9504 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9505
9506 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9507
9508 /* Free but don't flush the source register. */
9509 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9510 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9511
9512 return off;
9513}
9514
9515
9516#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9517 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9518
9519AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9520#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9521 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9522
9523
9524/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9525DECL_INLINE_THROW(uint32_t)
9526iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9527{
9528 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9529 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9530
9531 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9532 kIemNativeGstSimdRegLdStSz_Low128,
9533 kIemNativeGstRegUse_ForFullWrite);
9534 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9535
9536 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9537
9538 /* Free but don't flush the source register. */
9539 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9540 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9541
9542 return off;
9543}
9544
9545
9546#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9547 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9548
9549#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9550 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9551
9552#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9553 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9554
9555#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9556 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9557
9558#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9559 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9560
9561#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9562 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9563
9564/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9565DECL_INLINE_THROW(uint32_t)
9566iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9567 uint8_t cbLocal, uint8_t iElem)
9568{
9569 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9570 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
9571
9572#ifdef VBOX_STRICT
9573 switch (cbLocal)
9574 {
9575 case sizeof(uint64_t): Assert(iElem < 2); break;
9576 case sizeof(uint32_t): Assert(iElem < 4); break;
9577 case sizeof(uint16_t): Assert(iElem < 8); break;
9578 case sizeof(uint8_t): Assert(iElem < 16); break;
9579 default: AssertFailed();
9580 }
9581#endif
9582
9583 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9584 kIemNativeGstSimdRegLdStSz_Low128,
9585 kIemNativeGstRegUse_ForUpdate);
9586 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9587
9588 switch (cbLocal)
9589 {
9590 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9591 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9592 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9593 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9594 default: AssertFailed();
9595 }
9596
9597 /* Free but don't flush the source register. */
9598 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9599 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9600
9601 return off;
9602}
9603
9604
9605#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
9606 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
9607
9608/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
9609DECL_INLINE_THROW(uint32_t)
9610iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9611{
9612 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9613 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9614
9615 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9616 kIemNativeGstSimdRegLdStSz_Low128,
9617 kIemNativeGstRegUse_ForUpdate);
9618 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9619
9620 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
9621 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9622 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9623
9624 /* Free but don't flush the source register. */
9625 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9626 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9627
9628 return off;
9629}
9630
9631
9632#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
9633 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
9634
9635/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
9636DECL_INLINE_THROW(uint32_t)
9637iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9638{
9639 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9640 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9641
9642 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9643 kIemNativeGstSimdRegLdStSz_Low128,
9644 kIemNativeGstRegUse_ForUpdate);
9645 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9646
9647 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
9648 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9649 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9650
9651 /* Free but don't flush the source register. */
9652 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9653 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9654
9655 return off;
9656}
9657
9658
9659#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
9660 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
9661
9662/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
9663DECL_INLINE_THROW(uint32_t)
9664iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
9665 uint8_t idxSrcVar, uint8_t iDwSrc)
9666{
9667 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9668 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9669
9670 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9671 kIemNativeGstSimdRegLdStSz_Low128,
9672 kIemNativeGstRegUse_ForUpdate);
9673 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9674
9675 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
9676 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
9677
9678 /* Free but don't flush the destination register. */
9679 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9680 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9681
9682 return off;
9683}
9684
9685
9686#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9687 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9688
9689/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
9690DECL_INLINE_THROW(uint32_t)
9691iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9692{
9693 /*
9694 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
9695 * if iYRegDst gets allocated first for the full write it won't load the
9696 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
9697 * duplicated from the already allocated host register for iYRegDst containing
9698 * garbage. This will be catched by the guest register value checking in debug
9699 * builds.
9700 */
9701 if (iYRegDst != iYRegSrc)
9702 {
9703 /* Allocate destination and source register. */
9704 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9705 kIemNativeGstSimdRegLdStSz_256,
9706 kIemNativeGstRegUse_ForFullWrite);
9707 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9708 kIemNativeGstSimdRegLdStSz_Low128,
9709 kIemNativeGstRegUse_ReadOnly);
9710
9711 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9712 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9713
9714 /* Free but don't flush the source and destination register. */
9715 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9716 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9717 }
9718 else
9719 {
9720 /* This effectively only clears the upper 128-bits of the register. */
9721 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9722 kIemNativeGstSimdRegLdStSz_High128,
9723 kIemNativeGstRegUse_ForFullWrite);
9724
9725 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9726
9727 /* Free but don't flush the destination register. */
9728 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9729 }
9730
9731 return off;
9732}
9733
9734
9735#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9736 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9737
9738/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
9739DECL_INLINE_THROW(uint32_t)
9740iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9741{
9742 /*
9743 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
9744 * if iYRegDst gets allocated first for the full write it won't load the
9745 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
9746 * duplicated from the already allocated host register for iYRegDst containing
9747 * garbage. This will be catched by the guest register value checking in debug
9748 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
9749 * for a zmm register we don't support yet, so this is just a nop.
9750 */
9751 if (iYRegDst != iYRegSrc)
9752 {
9753 /* Allocate destination and source register. */
9754 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9755 kIemNativeGstSimdRegLdStSz_256,
9756 kIemNativeGstRegUse_ReadOnly);
9757 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9758 kIemNativeGstSimdRegLdStSz_256,
9759 kIemNativeGstRegUse_ForFullWrite);
9760
9761 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9762
9763 /* Free but don't flush the source and destination register. */
9764 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9765 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9766 }
9767
9768 return off;
9769}
9770
9771
9772#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
9773 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
9774
9775/** Emits code for IEM_MC_FETCH_YREG_U128. */
9776DECL_INLINE_THROW(uint32_t)
9777iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
9778{
9779 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9780 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9781
9782 Assert(iDQWord <= 1);
9783 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9784 iDQWord == 1
9785 ? kIemNativeGstSimdRegLdStSz_High128
9786 : kIemNativeGstSimdRegLdStSz_Low128,
9787 kIemNativeGstRegUse_ReadOnly);
9788
9789 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9790 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9791
9792 if (iDQWord == 1)
9793 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9794 else
9795 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9796
9797 /* Free but don't flush the source register. */
9798 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9799 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9800
9801 return off;
9802}
9803
9804
9805#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
9806 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
9807
9808/** Emits code for IEM_MC_FETCH_YREG_U64. */
9809DECL_INLINE_THROW(uint32_t)
9810iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
9811{
9812 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9813 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9814
9815 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9816 iQWord >= 2
9817 ? kIemNativeGstSimdRegLdStSz_High128
9818 : kIemNativeGstSimdRegLdStSz_Low128,
9819 kIemNativeGstRegUse_ReadOnly);
9820
9821 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9822 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9823
9824 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9825
9826 /* Free but don't flush the source register. */
9827 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9828 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9829
9830 return off;
9831}
9832
9833
9834#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
9835 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
9836
9837/** Emits code for IEM_MC_FETCH_YREG_U32. */
9838DECL_INLINE_THROW(uint32_t)
9839iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
9840{
9841 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9842 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9843
9844 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9845 iDWord >= 4
9846 ? kIemNativeGstSimdRegLdStSz_High128
9847 : kIemNativeGstSimdRegLdStSz_Low128,
9848 kIemNativeGstRegUse_ReadOnly);
9849
9850 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9851 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9852
9853 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9854
9855 /* Free but don't flush the source register. */
9856 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9857 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9858
9859 return off;
9860}
9861
9862
9863#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
9864 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
9865
9866/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
9867DECL_INLINE_THROW(uint32_t)
9868iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9869{
9870 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9871 kIemNativeGstSimdRegLdStSz_High128,
9872 kIemNativeGstRegUse_ForFullWrite);
9873
9874 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9875
9876 /* Free but don't flush the register. */
9877 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9878
9879 return off;
9880}
9881
9882
9883#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
9884 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
9885
9886/** Emits code for IEM_MC_STORE_YREG_U128. */
9887DECL_INLINE_THROW(uint32_t)
9888iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
9889{
9890 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9891 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9892
9893 Assert(iDQword <= 1);
9894 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9895 iDQword == 0
9896 ? kIemNativeGstSimdRegLdStSz_Low128
9897 : kIemNativeGstSimdRegLdStSz_High128,
9898 kIemNativeGstRegUse_ForFullWrite);
9899
9900 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9901
9902 if (iDQword == 0)
9903 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9904 else
9905 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
9906
9907 /* Free but don't flush the source register. */
9908 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9909 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9910
9911 return off;
9912}
9913
9914
9915#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9916 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9917
9918/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
9919DECL_INLINE_THROW(uint32_t)
9920iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9921{
9922 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9923 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9924
9925 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9926 kIemNativeGstSimdRegLdStSz_256,
9927 kIemNativeGstRegUse_ForFullWrite);
9928
9929 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9930
9931 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9932 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9933
9934 /* Free but don't flush the source register. */
9935 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9936 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9937
9938 return off;
9939}
9940
9941
9942#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
9943 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
9944
9945/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
9946DECL_INLINE_THROW(uint32_t)
9947iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9948{
9949 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9950 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9951
9952 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9953 kIemNativeGstSimdRegLdStSz_256,
9954 kIemNativeGstRegUse_ForFullWrite);
9955
9956 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9957
9958 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9959 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9960
9961 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9962 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9963
9964 return off;
9965}
9966
9967
9968#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
9969 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
9970
9971/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
9972DECL_INLINE_THROW(uint32_t)
9973iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9974{
9975 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9976 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9977
9978 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9979 kIemNativeGstSimdRegLdStSz_256,
9980 kIemNativeGstRegUse_ForFullWrite);
9981
9982 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9983
9984 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9985 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9986
9987 /* Free but don't flush the source register. */
9988 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9989 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9990
9991 return off;
9992}
9993
9994
9995#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
9996 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
9997
9998/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
9999DECL_INLINE_THROW(uint32_t)
10000iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10001{
10002 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10003 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10004
10005 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10006 kIemNativeGstSimdRegLdStSz_256,
10007 kIemNativeGstRegUse_ForFullWrite);
10008
10009 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10010
10011 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10012 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10013
10014 /* Free but don't flush the source register. */
10015 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10016 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10017
10018 return off;
10019}
10020
10021
10022#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
10023 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
10024
10025/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
10026DECL_INLINE_THROW(uint32_t)
10027iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10028{
10029 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10030 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10031
10032 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10033 kIemNativeGstSimdRegLdStSz_256,
10034 kIemNativeGstRegUse_ForFullWrite);
10035
10036 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10037
10038 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10039 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10040
10041 /* Free but don't flush the source register. */
10042 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10043 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10044
10045 return off;
10046}
10047
10048
10049#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
10050 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
10051
10052/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
10053DECL_INLINE_THROW(uint32_t)
10054iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10055{
10056 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10057 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10058
10059 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10060 kIemNativeGstSimdRegLdStSz_256,
10061 kIemNativeGstRegUse_ForFullWrite);
10062
10063 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10064
10065 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10066
10067 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10068 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10069
10070 return off;
10071}
10072
10073
10074#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
10075 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
10076
10077/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
10078DECL_INLINE_THROW(uint32_t)
10079iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10080{
10081 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10082 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10083
10084 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10085 kIemNativeGstSimdRegLdStSz_256,
10086 kIemNativeGstRegUse_ForFullWrite);
10087
10088 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10089
10090 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10091
10092 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10093 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10094
10095 return off;
10096}
10097
10098
10099#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10100 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10101
10102/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
10103DECL_INLINE_THROW(uint32_t)
10104iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10105{
10106 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10107 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10108
10109 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10110 kIemNativeGstSimdRegLdStSz_256,
10111 kIemNativeGstRegUse_ForFullWrite);
10112
10113 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10114
10115 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10116
10117 /* Free but don't flush the source register. */
10118 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10119 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10120
10121 return off;
10122}
10123
10124
10125#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10126 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10127
10128/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
10129DECL_INLINE_THROW(uint32_t)
10130iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10131{
10132 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10133 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10134
10135 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10136 kIemNativeGstSimdRegLdStSz_256,
10137 kIemNativeGstRegUse_ForFullWrite);
10138
10139 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10140
10141 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10142
10143 /* Free but don't flush the source register. */
10144 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10145 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10146
10147 return off;
10148}
10149
10150
10151#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10152 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10153
10154/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
10155DECL_INLINE_THROW(uint32_t)
10156iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10157{
10158 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10159 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10160
10161 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10162 kIemNativeGstSimdRegLdStSz_256,
10163 kIemNativeGstRegUse_ForFullWrite);
10164
10165 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10166
10167 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
10168
10169 /* Free but don't flush the source register. */
10170 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10171 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10172
10173 return off;
10174}
10175
10176
10177#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10178 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10179
10180/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
10181DECL_INLINE_THROW(uint32_t)
10182iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10183{
10184 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10185 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10186
10187 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10188 kIemNativeGstSimdRegLdStSz_256,
10189 kIemNativeGstRegUse_ForFullWrite);
10190
10191 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10192
10193 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10194 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
10195
10196 /* Free but don't flush the source register. */
10197 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10198 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10199
10200 return off;
10201}
10202
10203
10204#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10205 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10206
10207/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
10208DECL_INLINE_THROW(uint32_t)
10209iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10210{
10211 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10212 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10213
10214 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10215 kIemNativeGstSimdRegLdStSz_256,
10216 kIemNativeGstRegUse_ForFullWrite);
10217
10218 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10219
10220 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10221 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10222
10223 /* Free but don't flush the source register. */
10224 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10225 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10226
10227 return off;
10228}
10229
10230
10231#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10232 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10233
10234/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10235DECL_INLINE_THROW(uint32_t)
10236iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10237{
10238 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10239 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10240
10241 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10242 kIemNativeGstSimdRegLdStSz_256,
10243 kIemNativeGstRegUse_ForFullWrite);
10244 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10245 kIemNativeGstSimdRegLdStSz_Low128,
10246 kIemNativeGstRegUse_ReadOnly);
10247 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10248
10249 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10250 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10251 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10252
10253 /* Free but don't flush the source and destination registers. */
10254 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10255 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10256 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10257
10258 return off;
10259}
10260
10261
10262#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10263 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10264
10265/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10266DECL_INLINE_THROW(uint32_t)
10267iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10268{
10269 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10270 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10271
10272 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10273 kIemNativeGstSimdRegLdStSz_256,
10274 kIemNativeGstRegUse_ForFullWrite);
10275 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10276 kIemNativeGstSimdRegLdStSz_Low128,
10277 kIemNativeGstRegUse_ReadOnly);
10278 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10279
10280 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10281 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10282 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10283
10284 /* Free but don't flush the source and destination registers. */
10285 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10286 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10287 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10288
10289 return off;
10290}
10291
10292
10293#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10294 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10295
10296
10297/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10298DECL_INLINE_THROW(uint32_t)
10299iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10300{
10301 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10302 kIemNativeGstSimdRegLdStSz_Low128,
10303 kIemNativeGstRegUse_ForUpdate);
10304
10305 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10306 if (bImm8Mask & RT_BIT(0))
10307 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10308 if (bImm8Mask & RT_BIT(1))
10309 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10310 if (bImm8Mask & RT_BIT(2))
10311 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10312 if (bImm8Mask & RT_BIT(3))
10313 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10314
10315 /* Free but don't flush the destination register. */
10316 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10317
10318 return off;
10319}
10320
10321
10322#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10323 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10324
10325#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10326 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10327
10328/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10329DECL_INLINE_THROW(uint32_t)
10330iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10331{
10332 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10333 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10334
10335 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10336 kIemNativeGstSimdRegLdStSz_256,
10337 kIemNativeGstRegUse_ReadOnly);
10338 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10339
10340 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10341
10342 /* Free but don't flush the source register. */
10343 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10344 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10345
10346 return off;
10347}
10348
10349
10350#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10351 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10352
10353#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10354 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10355
10356/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10357DECL_INLINE_THROW(uint32_t)
10358iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10359{
10360 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10361 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10362
10363 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10364 kIemNativeGstSimdRegLdStSz_256,
10365 kIemNativeGstRegUse_ForFullWrite);
10366 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10367
10368 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10369
10370 /* Free but don't flush the source register. */
10371 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10372 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10373
10374 return off;
10375}
10376
10377
10378#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10379 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10380
10381
10382/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10383DECL_INLINE_THROW(uint32_t)
10384iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10385 uint8_t idxSrcVar, uint8_t iDwSrc)
10386{
10387 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10388 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10389
10390 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10391 iDwDst < 4
10392 ? kIemNativeGstSimdRegLdStSz_Low128
10393 : kIemNativeGstSimdRegLdStSz_High128,
10394 kIemNativeGstRegUse_ForUpdate);
10395 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10396 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10397
10398 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10399 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10400
10401 /* Free but don't flush the source register. */
10402 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10403 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10404 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10405
10406 return off;
10407}
10408
10409
10410#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10411 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10412
10413
10414/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10415DECL_INLINE_THROW(uint32_t)
10416iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10417 uint8_t idxSrcVar, uint8_t iQwSrc)
10418{
10419 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10420 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10421
10422 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10423 iQwDst < 2
10424 ? kIemNativeGstSimdRegLdStSz_Low128
10425 : kIemNativeGstSimdRegLdStSz_High128,
10426 kIemNativeGstRegUse_ForUpdate);
10427 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10428 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10429
10430 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10431 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10432
10433 /* Free but don't flush the source register. */
10434 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10435 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10436 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10437
10438 return off;
10439}
10440
10441
10442#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10443 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10444
10445
10446/** Emits code for IEM_MC_STORE_YREG_U64. */
10447DECL_INLINE_THROW(uint32_t)
10448iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10449{
10450 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10451 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10452
10453 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10454 iQwDst < 2
10455 ? kIemNativeGstSimdRegLdStSz_Low128
10456 : kIemNativeGstSimdRegLdStSz_High128,
10457 kIemNativeGstRegUse_ForUpdate);
10458
10459 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10460
10461 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10462
10463 /* Free but don't flush the source register. */
10464 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10465 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10466
10467 return off;
10468}
10469
10470
10471#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
10472 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
10473
10474/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10475DECL_INLINE_THROW(uint32_t)
10476iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10477{
10478 RT_NOREF(pReNative, iYReg);
10479 /** @todo Needs to be implemented when support for AVX-512 is added. */
10480 return off;
10481}
10482
10483
10484
10485/*********************************************************************************************************************************
10486* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10487*********************************************************************************************************************************/
10488
10489/**
10490 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10491 */
10492DECL_INLINE_THROW(uint32_t)
10493iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
10494{
10495 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10496 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10497 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10498 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10499
10500#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
10501 /*
10502 * Need to do the FPU preparation.
10503 */
10504 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10505#endif
10506
10507 /*
10508 * Do all the call setup and cleanup.
10509 */
10510 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10511 false /*fFlushPendingWrites*/);
10512
10513 /*
10514 * Load the MXCSR register into the first argument and mask out the current exception flags.
10515 */
10516 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10517 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10518
10519 /*
10520 * Make the call.
10521 */
10522 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10523
10524 /*
10525 * The updated MXCSR is in the return register, update exception status flags.
10526 *
10527 * The return register is marked allocated as a temporary because it is required for the
10528 * exception generation check below.
10529 */
10530 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
10531 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
10532 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
10533
10534#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10535 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10536 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
10537#endif
10538
10539 /*
10540 * Make sure we don't have any outstanding guest register writes as we may
10541 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
10542 */
10543 off = iemNativeRegFlushPendingWrites(pReNative, off);
10544
10545#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10546 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10547#else
10548 RT_NOREF(idxInstr);
10549#endif
10550
10551 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
10552 * want to assume the existence for this instruction at the moment. */
10553 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
10554
10555 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
10556 /* tmp &= X86_MXCSR_XCPT_MASK */
10557 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
10558 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
10559 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
10560 /* tmp = ~tmp */
10561 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
10562 /* tmp &= mxcsr */
10563 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
10564 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
10565 kIemNativeLabelType_RaiseSseAvxFpRelated);
10566
10567 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
10568 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10569 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
10570
10571 return off;
10572}
10573
10574
10575#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
10576 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10577
10578/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
10579DECL_INLINE_THROW(uint32_t)
10580iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10581{
10582 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10583 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10584 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
10585}
10586
10587
10588#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10589 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10590
10591/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
10592DECL_INLINE_THROW(uint32_t)
10593iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
10594 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10595{
10596 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10597 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10598 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10599 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
10600}
10601
10602
10603/*********************************************************************************************************************************
10604* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
10605*********************************************************************************************************************************/
10606
10607#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
10608 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10609
10610/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
10611DECL_INLINE_THROW(uint32_t)
10612iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10613{
10614 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10615 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10616 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
10617}
10618
10619
10620#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10621 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10622
10623/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
10624DECL_INLINE_THROW(uint32_t)
10625iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
10626 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10627{
10628 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10629 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10630 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10631 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
10632}
10633
10634
10635#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
10636
10637
10638/*********************************************************************************************************************************
10639* Include instruction emitters. *
10640*********************************************************************************************************************************/
10641#include "target-x86/IEMAllN8veEmit-x86.h"
10642
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette