VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 104092

Last change on this file since 104092 was 104092, checked in by vboxsync, 11 months ago

VMM/IEM: Implement native emitters for IEM_MC_STORE_XREG_R32() and IEM_MC_STORE_XREG_R64(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 419.3 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 104092 2024-03-27 14:44:51Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
117#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
118 if ( enmClass == kIemNativeGstRegRef_EFlags
119 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
120 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
121#else
122 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
123#endif
124
125 if ( enmClass == kIemNativeGstRegRef_Gpr
126 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
127 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
128#endif
129
130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
131 if ( enmClass == kIemNativeGstRegRef_XReg
132 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
133 {
134 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
135 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
136 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
137
138 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
139 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
140 }
141#endif
142 RT_NOREF(pReNative, enmClass, idxReg);
143 return off;
144}
145
146
147
148/*********************************************************************************************************************************
149* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
150*********************************************************************************************************************************/
151
152#undef IEM_MC_BEGIN /* unused */
153#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
154 { \
155 Assert(pReNative->Core.bmVars == 0); \
156 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
157 Assert(pReNative->Core.bmStack == 0); \
158 pReNative->fMc = (a_fMcFlags); \
159 pReNative->fCImpl = (a_fCImplFlags); \
160 pReNative->cArgsX = (a_cArgsIncludingHidden)
161
162/** We have to get to the end in recompilation mode, as otherwise we won't
163 * generate code for all the IEM_MC_IF_XXX branches. */
164#define IEM_MC_END() \
165 iemNativeVarFreeAll(pReNative); \
166 } return off
167
168
169
170/*********************************************************************************************************************************
171* Native Emitter Support. *
172*********************************************************************************************************************************/
173
174#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
175
176#define IEM_MC_NATIVE_ELSE() } else {
177
178#define IEM_MC_NATIVE_ENDIF() } ((void)0)
179
180
181#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
182 off = a_fnEmitter(pReNative, off)
183
184#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
185 off = a_fnEmitter(pReNative, off, (a0))
186
187#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1))
189
190#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
192
193#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
194 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
195
196#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
198
199#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
201
202#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
204
205#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
207
208
209
210/*********************************************************************************************************************************
211* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
212*********************************************************************************************************************************/
213
214#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
215 pReNative->fMc = 0; \
216 pReNative->fCImpl = (a_fFlags); \
217 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
218
219
220#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
221 pReNative->fMc = 0; \
222 pReNative->fCImpl = (a_fFlags); \
223 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
224
225DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
226 uint8_t idxInstr, uint64_t a_fGstShwFlush,
227 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
228{
229 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
230}
231
232
233#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
234 pReNative->fMc = 0; \
235 pReNative->fCImpl = (a_fFlags); \
236 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
237 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
238
239DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
240 uint8_t idxInstr, uint64_t a_fGstShwFlush,
241 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
242{
243 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
244}
245
246
247#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
248 pReNative->fMc = 0; \
249 pReNative->fCImpl = (a_fFlags); \
250 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
251 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
252
253DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
254 uint8_t idxInstr, uint64_t a_fGstShwFlush,
255 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
256 uint64_t uArg2)
257{
258 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
259}
260
261
262
263/*********************************************************************************************************************************
264* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
265*********************************************************************************************************************************/
266
267/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
268 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
269DECL_INLINE_THROW(uint32_t)
270iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
271{
272 /*
273 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
274 * return with special status code and make the execution loop deal with
275 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
276 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
277 * could continue w/o interruption, it probably will drop into the
278 * debugger, so not worth the effort of trying to services it here and we
279 * just lump it in with the handling of the others.
280 *
281 * To simplify the code and the register state management even more (wrt
282 * immediate in AND operation), we always update the flags and skip the
283 * extra check associated conditional jump.
284 */
285 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
286 <= UINT32_MAX);
287#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
288 AssertMsg( pReNative->idxCurCall == 0
289 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
290 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
291#endif
292
293 /*
294 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
295 * any pending register writes must be flushed.
296 */
297 off = iemNativeRegFlushPendingWrites(pReNative, off);
298
299 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
300 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
301 true /*fSkipLivenessAssert*/);
302 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
303 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
304 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
305 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
306 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
307
308 /* Free but don't flush the EFLAGS register. */
309 iemNativeRegFreeTmp(pReNative, idxEflReg);
310
311 return off;
312}
313
314
315/** The VINF_SUCCESS dummy. */
316template<int const a_rcNormal>
317DECL_FORCE_INLINE(uint32_t)
318iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
319{
320 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
321 if (a_rcNormal != VINF_SUCCESS)
322 {
323#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
324 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
325#else
326 RT_NOREF_PV(idxInstr);
327#endif
328
329 /* As this code returns from the TB any pending register writes must be flushed. */
330 off = iemNativeRegFlushPendingWrites(pReNative, off);
331
332 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
333 }
334 return off;
335}
336
337
338#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
339 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
340 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
341
342#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
343 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
344 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
345 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
346
347/** Same as iemRegAddToRip64AndFinishingNoFlags. */
348DECL_INLINE_THROW(uint32_t)
349iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
350{
351#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
352# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
353 if (!pReNative->Core.offPc)
354 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
355# endif
356
357 /* Allocate a temporary PC register. */
358 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
359
360 /* Perform the addition and store the result. */
361 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
362 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
363
364 /* Free but don't flush the PC register. */
365 iemNativeRegFreeTmp(pReNative, idxPcReg);
366#endif
367
368#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
369 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
370
371 pReNative->Core.offPc += cbInstr;
372# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
373 off = iemNativePcAdjustCheck(pReNative, off);
374# endif
375 if (pReNative->cCondDepth)
376 off = iemNativeEmitPcWriteback(pReNative, off);
377 else
378 pReNative->Core.cInstrPcUpdateSkipped++;
379#endif
380
381 return off;
382}
383
384
385#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
386 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
387 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
388
389#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
390 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
391 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
392 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
393
394/** Same as iemRegAddToEip32AndFinishingNoFlags. */
395DECL_INLINE_THROW(uint32_t)
396iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
397{
398#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
399# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
400 if (!pReNative->Core.offPc)
401 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
402# endif
403
404 /* Allocate a temporary PC register. */
405 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
406
407 /* Perform the addition and store the result. */
408 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
409 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
410
411 /* Free but don't flush the PC register. */
412 iemNativeRegFreeTmp(pReNative, idxPcReg);
413#endif
414
415#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
416 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
417
418 pReNative->Core.offPc += cbInstr;
419# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
420 off = iemNativePcAdjustCheck(pReNative, off);
421# endif
422 if (pReNative->cCondDepth)
423 off = iemNativeEmitPcWriteback(pReNative, off);
424 else
425 pReNative->Core.cInstrPcUpdateSkipped++;
426#endif
427
428 return off;
429}
430
431
432#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
433 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
434 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
435
436#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
437 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
438 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
439 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
440
441/** Same as iemRegAddToIp16AndFinishingNoFlags. */
442DECL_INLINE_THROW(uint32_t)
443iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
444{
445#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
446# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
447 if (!pReNative->Core.offPc)
448 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
449# endif
450
451 /* Allocate a temporary PC register. */
452 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
453
454 /* Perform the addition and store the result. */
455 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
456 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
457 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
458
459 /* Free but don't flush the PC register. */
460 iemNativeRegFreeTmp(pReNative, idxPcReg);
461#endif
462
463#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
464 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
465
466 pReNative->Core.offPc += cbInstr;
467# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
468 off = iemNativePcAdjustCheck(pReNative, off);
469# endif
470 if (pReNative->cCondDepth)
471 off = iemNativeEmitPcWriteback(pReNative, off);
472 else
473 pReNative->Core.cInstrPcUpdateSkipped++;
474#endif
475
476 return off;
477}
478
479
480
481/*********************************************************************************************************************************
482* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
483*********************************************************************************************************************************/
484
485#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
486 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
487 (a_enmEffOpSize), pCallEntry->idxInstr); \
488 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
489
490#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
491 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
492 (a_enmEffOpSize), pCallEntry->idxInstr); \
493 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
494 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
495
496#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
497 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
498 IEMMODE_16BIT, pCallEntry->idxInstr); \
499 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
500
501#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
502 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
503 IEMMODE_16BIT, pCallEntry->idxInstr); \
504 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
505 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
506
507#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
508 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
509 IEMMODE_64BIT, pCallEntry->idxInstr); \
510 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
511
512#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
513 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
514 IEMMODE_64BIT, pCallEntry->idxInstr); \
515 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
516 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
517
518/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
519 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
520 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
521DECL_INLINE_THROW(uint32_t)
522iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
523 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
524{
525 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
526
527 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
528 off = iemNativeRegFlushPendingWrites(pReNative, off);
529
530#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
531 Assert(pReNative->Core.offPc == 0);
532
533 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
534#endif
535
536 /* Allocate a temporary PC register. */
537 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
538
539 /* Perform the addition. */
540 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
541
542 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
543 {
544 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
545 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
546 }
547 else
548 {
549 /* Just truncate the result to 16-bit IP. */
550 Assert(enmEffOpSize == IEMMODE_16BIT);
551 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
552 }
553 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
554
555 /* Free but don't flush the PC register. */
556 iemNativeRegFreeTmp(pReNative, idxPcReg);
557
558 return off;
559}
560
561
562#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
563 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
564 (a_enmEffOpSize), pCallEntry->idxInstr); \
565 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
566
567#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
568 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
569 (a_enmEffOpSize), pCallEntry->idxInstr); \
570 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
571 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
572
573#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
574 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
575 IEMMODE_16BIT, pCallEntry->idxInstr); \
576 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
577
578#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
579 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
580 IEMMODE_16BIT, pCallEntry->idxInstr); \
581 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
582 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
583
584#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
585 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
586 IEMMODE_32BIT, pCallEntry->idxInstr); \
587 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
588
589#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
590 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
591 IEMMODE_32BIT, pCallEntry->idxInstr); \
592 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
593 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
594
595/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
596 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
597 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
598DECL_INLINE_THROW(uint32_t)
599iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
600 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
601{
602 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
603
604 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
605 off = iemNativeRegFlushPendingWrites(pReNative, off);
606
607#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
608 Assert(pReNative->Core.offPc == 0);
609
610 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
611#endif
612
613 /* Allocate a temporary PC register. */
614 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
615
616 /* Perform the addition. */
617 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
618
619 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
620 if (enmEffOpSize == IEMMODE_16BIT)
621 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
622
623 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
624/** @todo we can skip this in 32-bit FLAT mode. */
625 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
626
627 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
628
629 /* Free but don't flush the PC register. */
630 iemNativeRegFreeTmp(pReNative, idxPcReg);
631
632 return off;
633}
634
635
636#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
637 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
638 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
639
640#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
641 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
642 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
643 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
644
645#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
646 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
647 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
648
649#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
650 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
651 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
652 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
653
654#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
655 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
656 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
657
658#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
659 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
660 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
661 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
662
663/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
664DECL_INLINE_THROW(uint32_t)
665iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
666 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
667{
668 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
669 off = iemNativeRegFlushPendingWrites(pReNative, off);
670
671#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
672 Assert(pReNative->Core.offPc == 0);
673
674 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
675#endif
676
677 /* Allocate a temporary PC register. */
678 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
679
680 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
681 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
682 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
683 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
684 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
685
686 /* Free but don't flush the PC register. */
687 iemNativeRegFreeTmp(pReNative, idxPcReg);
688
689 return off;
690}
691
692
693
694/*********************************************************************************************************************************
695* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
696*********************************************************************************************************************************/
697
698/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
699#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
700 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
701
702/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
703#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
704 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
705
706/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
707#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
708 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
709
710/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
711 * clears flags. */
712#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
713 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
714 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
715
716/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
717 * clears flags. */
718#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
719 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
720 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
721
722/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
723 * clears flags. */
724#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
725 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
726 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
727
728#undef IEM_MC_SET_RIP_U16_AND_FINISH
729
730
731/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
732#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
733 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
734
735/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
736#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
737 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
738
739/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
740 * clears flags. */
741#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
742 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
743 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
744
745/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
746 * and clears flags. */
747#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
748 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
749 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
750
751#undef IEM_MC_SET_RIP_U32_AND_FINISH
752
753
754/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
755#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
756 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
757
758/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
759 * and clears flags. */
760#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
761 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
762 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
763
764#undef IEM_MC_SET_RIP_U64_AND_FINISH
765
766
767/** Same as iemRegRipJumpU16AndFinishNoFlags,
768 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
769DECL_INLINE_THROW(uint32_t)
770iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
771 uint8_t idxInstr, uint8_t cbVar)
772{
773 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
774 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
775
776 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
777 off = iemNativeRegFlushPendingWrites(pReNative, off);
778
779#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
780 Assert(pReNative->Core.offPc == 0);
781
782 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
783#endif
784
785 /* Get a register with the new PC loaded from idxVarPc.
786 Note! This ASSUMES that the high bits of the GPR is zeroed. */
787 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
788
789 /* Check limit (may #GP(0) + exit TB). */
790 if (!f64Bit)
791/** @todo we can skip this test in FLAT 32-bit mode. */
792 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
793 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
794 else if (cbVar > sizeof(uint32_t))
795 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
796
797 /* Store the result. */
798 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
799
800 iemNativeVarRegisterRelease(pReNative, idxVarPc);
801 /** @todo implictly free the variable? */
802
803 return off;
804}
805
806
807
808/*********************************************************************************************************************************
809* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
810*********************************************************************************************************************************/
811
812#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
813 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
814
815/**
816 * Emits code to check if a \#NM exception should be raised.
817 *
818 * @returns New code buffer offset, UINT32_MAX on failure.
819 * @param pReNative The native recompile state.
820 * @param off The code buffer offset.
821 * @param idxInstr The current instruction.
822 */
823DECL_INLINE_THROW(uint32_t)
824iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
825{
826#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
827 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
828
829 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
830 {
831#endif
832 /*
833 * Make sure we don't have any outstanding guest register writes as we may
834 * raise an #NM and all guest register must be up to date in CPUMCTX.
835 */
836 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
837 off = iemNativeRegFlushPendingWrites(pReNative, off);
838
839#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
840 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
841#else
842 RT_NOREF(idxInstr);
843#endif
844
845 /* Allocate a temporary CR0 register. */
846 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
847 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
848
849 /*
850 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
851 * return raisexcpt();
852 */
853 /* Test and jump. */
854 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
855
856 /* Free but don't flush the CR0 register. */
857 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
858
859#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
860 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
861 }
862 else
863 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
864#endif
865
866 return off;
867}
868
869
870#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
871 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
872
873/**
874 * Emits code to check if a \#MF exception should be raised.
875 *
876 * @returns New code buffer offset, UINT32_MAX on failure.
877 * @param pReNative The native recompile state.
878 * @param off The code buffer offset.
879 * @param idxInstr The current instruction.
880 */
881DECL_INLINE_THROW(uint32_t)
882iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
883{
884 /*
885 * Make sure we don't have any outstanding guest register writes as we may
886 * raise an #MF and all guest register must be up to date in CPUMCTX.
887 */
888 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
889 off = iemNativeRegFlushPendingWrites(pReNative, off);
890
891#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
892 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
893#else
894 RT_NOREF(idxInstr);
895#endif
896
897 /* Allocate a temporary FSW register. */
898 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
899 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
900
901 /*
902 * if (FSW & X86_FSW_ES != 0)
903 * return raisexcpt();
904 */
905 /* Test and jump. */
906 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
907
908 /* Free but don't flush the FSW register. */
909 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
910
911 return off;
912}
913
914
915#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
916 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
917
918/**
919 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
920 *
921 * @returns New code buffer offset, UINT32_MAX on failure.
922 * @param pReNative The native recompile state.
923 * @param off The code buffer offset.
924 * @param idxInstr The current instruction.
925 */
926DECL_INLINE_THROW(uint32_t)
927iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
928{
929#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
930 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
931
932 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
933 {
934#endif
935 /*
936 * Make sure we don't have any outstanding guest register writes as we may
937 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
938 */
939 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
940 off = iemNativeRegFlushPendingWrites(pReNative, off);
941
942#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
943 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
944#else
945 RT_NOREF(idxInstr);
946#endif
947
948 /* Allocate a temporary CR0 and CR4 register. */
949 uint8_t const idxLabelRaiseSseRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseRelated);
950 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
951 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
952 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
953
954 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
955#ifdef RT_ARCH_AMD64
956 /*
957 * We do a modified test here:
958 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
959 * else { goto RaiseSseRelated; }
960 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
961 * all targets except the 386, which doesn't support SSE, this should
962 * be a safe assumption.
963 */
964 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
965 //pCodeBuf[off++] = 0xcc;
966 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
967 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
968 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
969 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
970 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
971 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseSseRelated, kIemNativeInstrCond_ne);
972
973#elif defined(RT_ARCH_ARM64)
974 /*
975 * We do a modified test here:
976 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
977 * else { goto RaiseSseRelated; }
978 */
979 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
980 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
981 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
982 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
983 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
984 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
985 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
986 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
987 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
988 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
989 idxLabelRaiseSseRelated);
990
991#else
992# error "Port me!"
993#endif
994
995 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
996 iemNativeRegFreeTmp(pReNative, idxTmpReg);
997 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
998 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
999
1000#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1001 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
1002 }
1003 else
1004 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
1005#endif
1006
1007 return off;
1008}
1009
1010
1011#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
1012 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
1013
1014/**
1015 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
1016 *
1017 * @returns New code buffer offset, UINT32_MAX on failure.
1018 * @param pReNative The native recompile state.
1019 * @param off The code buffer offset.
1020 * @param idxInstr The current instruction.
1021 */
1022DECL_INLINE_THROW(uint32_t)
1023iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1024{
1025#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1026 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
1027
1028 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
1029 {
1030#endif
1031 /*
1032 * Make sure we don't have any outstanding guest register writes as we may
1033 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1034 */
1035 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
1036 off = iemNativeRegFlushPendingWrites(pReNative, off);
1037
1038#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1039 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1040#else
1041 RT_NOREF(idxInstr);
1042#endif
1043
1044 /* Allocate a temporary CR0, CR4 and XCR0 register. */
1045 uint8_t const idxLabelRaiseAvxRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseAvxRelated);
1046 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
1047 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
1048 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
1049 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1050
1051 /*
1052 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
1053 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
1054 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
1055 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
1056 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
1057 * { likely }
1058 * else { goto RaiseAvxRelated; }
1059 */
1060#ifdef RT_ARCH_AMD64
1061 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
1062 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
1063 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
1064 ^ 0x1a) ) { likely }
1065 else { goto RaiseAvxRelated; } */
1066 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
1067 //pCodeBuf[off++] = 0xcc;
1068 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
1069 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
1070 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
1071 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1072 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
1073 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
1074 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1075 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
1076 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
1077 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
1078 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseAvxRelated, kIemNativeInstrCond_ne);
1079
1080#elif defined(RT_ARCH_ARM64)
1081 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
1082 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
1083 else { goto RaiseAvxRelated; } */
1084 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
1085 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1086 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
1087 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
1088 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
1089 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
1090 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
1091 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
1092 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
1093 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
1094 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
1095 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
1096 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1097 idxLabelRaiseAvxRelated);
1098
1099#else
1100# error "Port me!"
1101#endif
1102
1103 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1104 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1105 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1106 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
1107#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1108 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
1109 }
1110 else
1111 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
1112#endif
1113
1114 return off;
1115}
1116
1117
1118#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1119#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1120 off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off)
1121
1122/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
1123DECL_INLINE_THROW(uint32_t)
1124iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1125{
1126 uint8_t const idxLabelRaiseSseAvxFpRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseAvxFpRelated);
1127 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
1128 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1129
1130 /* mov tmp, varmxcsr */
1131 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1132 /* tmp &= X86_MXCSR_XCPT_MASK */
1133 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
1134 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
1135 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
1136 /* tmp = ~tmp */
1137 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
1138 /* tmp &= mxcsr */
1139 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1140 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
1141 idxLabelRaiseSseAvxFpRelated);
1142
1143 /* Free but don't flush the MXCSR register. */
1144 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
1145 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1146
1147 return off;
1148}
1149#endif
1150
1151
1152#define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1153 off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
1154
1155/**
1156 * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
1157 *
1158 * @returns New code buffer offset, UINT32_MAX on failure.
1159 * @param pReNative The native recompile state.
1160 * @param off The code buffer offset.
1161 * @param idxInstr The current instruction.
1162 */
1163DECL_INLINE_THROW(uint32_t)
1164iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1165{
1166 /*
1167 * Make sure we don't have any outstanding guest register writes as we may
1168 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1169 */
1170 off = iemNativeRegFlushPendingWrites(pReNative, off);
1171
1172#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1173 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1174#else
1175 RT_NOREF(idxInstr);
1176#endif
1177
1178 /* Allocate a temporary CR4 register. */
1179 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
1180 uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
1181 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
1182
1183 /*
1184 * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
1185 * return raisexcpt();
1186 */
1187 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
1188
1189 /* raise \#UD exception unconditionally. */
1190 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
1191
1192 /* Free but don't flush the CR4 register. */
1193 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1194
1195 return off;
1196}
1197
1198
1199#define IEM_MC_RAISE_DIVIDE_ERROR() \
1200 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
1201
1202/**
1203 * Emits code to raise a \#DE.
1204 *
1205 * @returns New code buffer offset, UINT32_MAX on failure.
1206 * @param pReNative The native recompile state.
1207 * @param off The code buffer offset.
1208 * @param idxInstr The current instruction.
1209 */
1210DECL_INLINE_THROW(uint32_t)
1211iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1212{
1213 /*
1214 * Make sure we don't have any outstanding guest register writes as we may
1215 */
1216 off = iemNativeRegFlushPendingWrites(pReNative, off);
1217
1218#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1219 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1220#else
1221 RT_NOREF(idxInstr);
1222#endif
1223
1224 uint8_t const idxLabelRaiseDe = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseDe);
1225
1226 /* raise \#DE exception unconditionally. */
1227 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseDe);
1228
1229 return off;
1230}
1231
1232
1233/*********************************************************************************************************************************
1234* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
1235*********************************************************************************************************************************/
1236
1237/**
1238 * Pushes an IEM_MC_IF_XXX onto the condition stack.
1239 *
1240 * @returns Pointer to the condition stack entry on success, NULL on failure
1241 * (too many nestings)
1242 */
1243DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
1244{
1245#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1246 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
1247#endif
1248
1249 uint32_t const idxStack = pReNative->cCondDepth;
1250 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
1251
1252 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
1253 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
1254
1255 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
1256 pEntry->fInElse = false;
1257 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
1258 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
1259
1260 return pEntry;
1261}
1262
1263
1264/**
1265 * Start of the if-block, snapshotting the register and variable state.
1266 */
1267DECL_INLINE_THROW(void)
1268iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
1269{
1270 Assert(offIfBlock != UINT32_MAX);
1271 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1272 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1273 Assert(!pEntry->fInElse);
1274
1275 /* Define the start of the IF block if request or for disassembly purposes. */
1276 if (idxLabelIf != UINT32_MAX)
1277 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
1278#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1279 else
1280 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
1281#else
1282 RT_NOREF(offIfBlock);
1283#endif
1284
1285#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1286 Assert(pReNative->Core.offPc == 0);
1287#endif
1288
1289 /* Copy the initial state so we can restore it in the 'else' block. */
1290 pEntry->InitialState = pReNative->Core;
1291}
1292
1293
1294#define IEM_MC_ELSE() } while (0); \
1295 off = iemNativeEmitElse(pReNative, off); \
1296 do {
1297
1298/** Emits code related to IEM_MC_ELSE. */
1299DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1300{
1301 /* Check sanity and get the conditional stack entry. */
1302 Assert(off != UINT32_MAX);
1303 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1304 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1305 Assert(!pEntry->fInElse);
1306
1307#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1308 /* Writeback any dirty shadow registers. */
1309 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
1310 * in one of the branches and leave guest registers already dirty before the start of the if
1311 * block alone. */
1312 off = iemNativeRegFlushDirtyGuest(pReNative, off);
1313#endif
1314
1315 /* Jump to the endif */
1316 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
1317
1318 /* Define the else label and enter the else part of the condition. */
1319 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1320 pEntry->fInElse = true;
1321
1322#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1323 Assert(pReNative->Core.offPc == 0);
1324#endif
1325
1326 /* Snapshot the core state so we can do a merge at the endif and restore
1327 the snapshot we took at the start of the if-block. */
1328 pEntry->IfFinalState = pReNative->Core;
1329 pReNative->Core = pEntry->InitialState;
1330
1331 return off;
1332}
1333
1334
1335#define IEM_MC_ENDIF() } while (0); \
1336 off = iemNativeEmitEndIf(pReNative, off)
1337
1338/** Emits code related to IEM_MC_ENDIF. */
1339DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1340{
1341 /* Check sanity and get the conditional stack entry. */
1342 Assert(off != UINT32_MAX);
1343 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1344 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1345
1346#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1347 Assert(pReNative->Core.offPc == 0);
1348#endif
1349#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1350 /* Writeback any dirty shadow registers (else branch). */
1351 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
1352 * in one of the branches and leave guest registers already dirty before the start of the if
1353 * block alone. */
1354 off = iemNativeRegFlushDirtyGuest(pReNative, off);
1355#endif
1356
1357 /*
1358 * Now we have find common group with the core state at the end of the
1359 * if-final. Use the smallest common denominator and just drop anything
1360 * that isn't the same in both states.
1361 */
1362 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
1363 * which is why we're doing this at the end of the else-block.
1364 * But we'd need more info about future for that to be worth the effort. */
1365 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
1366#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1367 Assert( pOther->bmGstRegShadowDirty == 0
1368 && pReNative->Core.bmGstRegShadowDirty == 0);
1369#endif
1370
1371 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
1372 {
1373 /* shadow guest stuff first. */
1374 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
1375 if (fGstRegs)
1376 {
1377 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
1378 do
1379 {
1380 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
1381 fGstRegs &= ~RT_BIT_64(idxGstReg);
1382
1383 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
1384 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
1385 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
1386 {
1387 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
1388 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
1389
1390#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1391 /* Writeback any dirty shadow registers we are about to unshadow. */
1392 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
1393#endif
1394 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
1395 }
1396 } while (fGstRegs);
1397 }
1398 else
1399 {
1400 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
1401#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1402 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1403#endif
1404 }
1405
1406 /* Check variables next. For now we must require them to be identical
1407 or stuff we can recreate. */
1408 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
1409 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
1410 if (fVars)
1411 {
1412 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
1413 do
1414 {
1415 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
1416 fVars &= ~RT_BIT_32(idxVar);
1417
1418 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
1419 {
1420 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
1421 continue;
1422 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
1423 {
1424 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1425 if (idxHstReg != UINT8_MAX)
1426 {
1427 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1428 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1429 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
1430 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1431 }
1432 continue;
1433 }
1434 }
1435 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
1436 continue;
1437
1438 /* Irreconcilable, so drop it. */
1439 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1440 if (idxHstReg != UINT8_MAX)
1441 {
1442 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1443 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1444 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
1445 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1446 }
1447 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1448 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
1449 } while (fVars);
1450 }
1451
1452 /* Finally, check that the host register allocations matches. */
1453 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
1454 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
1455 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
1456 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
1457 }
1458
1459 /*
1460 * Define the endif label and maybe the else one if we're still in the 'if' part.
1461 */
1462 if (!pEntry->fInElse)
1463 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1464 else
1465 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
1466 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
1467
1468 /* Pop the conditional stack.*/
1469 pReNative->cCondDepth -= 1;
1470
1471 return off;
1472}
1473
1474
1475#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
1476 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
1477 do {
1478
1479/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
1480DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1481{
1482 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1483 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1484
1485 /* Get the eflags. */
1486 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1487 kIemNativeGstRegUse_ReadOnly);
1488
1489 /* Test and jump. */
1490 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1491
1492 /* Free but don't flush the EFlags register. */
1493 iemNativeRegFreeTmp(pReNative, idxEflReg);
1494
1495 /* Make a copy of the core state now as we start the if-block. */
1496 iemNativeCondStartIfBlock(pReNative, off);
1497
1498 return off;
1499}
1500
1501
1502#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
1503 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
1504 do {
1505
1506/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
1507DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1508{
1509 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1510 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1511
1512 /* Get the eflags. */
1513 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1514 kIemNativeGstRegUse_ReadOnly);
1515
1516 /* Test and jump. */
1517 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1518
1519 /* Free but don't flush the EFlags register. */
1520 iemNativeRegFreeTmp(pReNative, idxEflReg);
1521
1522 /* Make a copy of the core state now as we start the if-block. */
1523 iemNativeCondStartIfBlock(pReNative, off);
1524
1525 return off;
1526}
1527
1528
1529#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
1530 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
1531 do {
1532
1533/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
1534DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1535{
1536 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1537 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1538
1539 /* Get the eflags. */
1540 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1541 kIemNativeGstRegUse_ReadOnly);
1542
1543 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1544 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1545
1546 /* Test and jump. */
1547 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1548
1549 /* Free but don't flush the EFlags register. */
1550 iemNativeRegFreeTmp(pReNative, idxEflReg);
1551
1552 /* Make a copy of the core state now as we start the if-block. */
1553 iemNativeCondStartIfBlock(pReNative, off);
1554
1555 return off;
1556}
1557
1558
1559#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
1560 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
1561 do {
1562
1563/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
1564DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1565{
1566 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1567 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1568
1569 /* Get the eflags. */
1570 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1571 kIemNativeGstRegUse_ReadOnly);
1572
1573 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1574 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1575
1576 /* Test and jump. */
1577 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1578
1579 /* Free but don't flush the EFlags register. */
1580 iemNativeRegFreeTmp(pReNative, idxEflReg);
1581
1582 /* Make a copy of the core state now as we start the if-block. */
1583 iemNativeCondStartIfBlock(pReNative, off);
1584
1585 return off;
1586}
1587
1588
1589#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
1590 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
1591 do {
1592
1593#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
1594 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
1595 do {
1596
1597/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
1598DECL_INLINE_THROW(uint32_t)
1599iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1600 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1601{
1602 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
1603 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1604
1605 /* Get the eflags. */
1606 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1607 kIemNativeGstRegUse_ReadOnly);
1608
1609 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1610 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1611
1612 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1613 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1614 Assert(iBitNo1 != iBitNo2);
1615
1616#ifdef RT_ARCH_AMD64
1617 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
1618
1619 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1620 if (iBitNo1 > iBitNo2)
1621 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1622 else
1623 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1624 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1625
1626#elif defined(RT_ARCH_ARM64)
1627 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1628 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1629
1630 /* and tmpreg, eflreg, #1<<iBitNo1 */
1631 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1632
1633 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1634 if (iBitNo1 > iBitNo2)
1635 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1636 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1637 else
1638 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1639 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1640
1641 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1642
1643#else
1644# error "Port me"
1645#endif
1646
1647 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1648 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1649 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1650
1651 /* Free but don't flush the EFlags and tmp registers. */
1652 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1653 iemNativeRegFreeTmp(pReNative, idxEflReg);
1654
1655 /* Make a copy of the core state now as we start the if-block. */
1656 iemNativeCondStartIfBlock(pReNative, off);
1657
1658 return off;
1659}
1660
1661
1662#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
1663 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
1664 do {
1665
1666#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
1667 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
1668 do {
1669
1670/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
1671 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
1672DECL_INLINE_THROW(uint32_t)
1673iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
1674 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1675{
1676 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
1677 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1678
1679 /* We need an if-block label for the non-inverted variant. */
1680 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
1681 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
1682
1683 /* Get the eflags. */
1684 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1685 kIemNativeGstRegUse_ReadOnly);
1686
1687 /* Translate the flag masks to bit numbers. */
1688 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1689 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1690
1691 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1692 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1693 Assert(iBitNo1 != iBitNo);
1694
1695 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1696 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1697 Assert(iBitNo2 != iBitNo);
1698 Assert(iBitNo2 != iBitNo1);
1699
1700#ifdef RT_ARCH_AMD64
1701 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
1702#elif defined(RT_ARCH_ARM64)
1703 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1704#endif
1705
1706 /* Check for the lone bit first. */
1707 if (!fInverted)
1708 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1709 else
1710 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
1711
1712 /* Then extract and compare the other two bits. */
1713#ifdef RT_ARCH_AMD64
1714 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1715 if (iBitNo1 > iBitNo2)
1716 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1717 else
1718 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1719 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1720
1721#elif defined(RT_ARCH_ARM64)
1722 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1723
1724 /* and tmpreg, eflreg, #1<<iBitNo1 */
1725 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1726
1727 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1728 if (iBitNo1 > iBitNo2)
1729 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1730 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1731 else
1732 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1733 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1734
1735 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1736
1737#else
1738# error "Port me"
1739#endif
1740
1741 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1742 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1743 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1744
1745 /* Free but don't flush the EFlags and tmp registers. */
1746 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1747 iemNativeRegFreeTmp(pReNative, idxEflReg);
1748
1749 /* Make a copy of the core state now as we start the if-block. */
1750 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
1751
1752 return off;
1753}
1754
1755
1756#define IEM_MC_IF_CX_IS_NZ() \
1757 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
1758 do {
1759
1760/** Emits code for IEM_MC_IF_CX_IS_NZ. */
1761DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1762{
1763 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1764
1765 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1766 kIemNativeGstRegUse_ReadOnly);
1767 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
1768 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1769
1770 iemNativeCondStartIfBlock(pReNative, off);
1771 return off;
1772}
1773
1774
1775#define IEM_MC_IF_ECX_IS_NZ() \
1776 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
1777 do {
1778
1779#define IEM_MC_IF_RCX_IS_NZ() \
1780 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
1781 do {
1782
1783/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
1784DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1785{
1786 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1787
1788 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1789 kIemNativeGstRegUse_ReadOnly);
1790 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
1791 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1792
1793 iemNativeCondStartIfBlock(pReNative, off);
1794 return off;
1795}
1796
1797
1798#define IEM_MC_IF_CX_IS_NOT_ONE() \
1799 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
1800 do {
1801
1802/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
1803DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1804{
1805 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1806
1807 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1808 kIemNativeGstRegUse_ReadOnly);
1809#ifdef RT_ARCH_AMD64
1810 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1811#else
1812 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1813 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1814 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1815#endif
1816 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1817
1818 iemNativeCondStartIfBlock(pReNative, off);
1819 return off;
1820}
1821
1822
1823#define IEM_MC_IF_ECX_IS_NOT_ONE() \
1824 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
1825 do {
1826
1827#define IEM_MC_IF_RCX_IS_NOT_ONE() \
1828 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
1829 do {
1830
1831/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
1832DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1833{
1834 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1835
1836 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1837 kIemNativeGstRegUse_ReadOnly);
1838 if (f64Bit)
1839 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1840 else
1841 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1842 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1843
1844 iemNativeCondStartIfBlock(pReNative, off);
1845 return off;
1846}
1847
1848
1849#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1850 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
1851 do {
1852
1853#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1854 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
1855 do {
1856
1857/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
1858 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1859DECL_INLINE_THROW(uint32_t)
1860iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
1861{
1862 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1863 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1864
1865 /* We have to load both RCX and EFLAGS before we can start branching,
1866 otherwise we'll end up in the else-block with an inconsistent
1867 register allocator state.
1868 Doing EFLAGS first as it's more likely to be loaded, right? */
1869 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1870 kIemNativeGstRegUse_ReadOnly);
1871 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1872 kIemNativeGstRegUse_ReadOnly);
1873
1874 /** @todo we could reduce this to a single branch instruction by spending a
1875 * temporary register and some setnz stuff. Not sure if loops are
1876 * worth it. */
1877 /* Check CX. */
1878#ifdef RT_ARCH_AMD64
1879 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1880#else
1881 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1882 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1883 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1884#endif
1885
1886 /* Check the EFlags bit. */
1887 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1888 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1889 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1890 !fCheckIfSet /*fJmpIfSet*/);
1891
1892 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1893 iemNativeRegFreeTmp(pReNative, idxEflReg);
1894
1895 iemNativeCondStartIfBlock(pReNative, off);
1896 return off;
1897}
1898
1899
1900#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1901 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
1902 do {
1903
1904#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1905 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
1906 do {
1907
1908#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1909 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
1910 do {
1911
1912#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1913 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
1914 do {
1915
1916/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
1917 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
1918 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
1919 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1920DECL_INLINE_THROW(uint32_t)
1921iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1922 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
1923{
1924 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1925 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1926
1927 /* We have to load both RCX and EFLAGS before we can start branching,
1928 otherwise we'll end up in the else-block with an inconsistent
1929 register allocator state.
1930 Doing EFLAGS first as it's more likely to be loaded, right? */
1931 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1932 kIemNativeGstRegUse_ReadOnly);
1933 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1934 kIemNativeGstRegUse_ReadOnly);
1935
1936 /** @todo we could reduce this to a single branch instruction by spending a
1937 * temporary register and some setnz stuff. Not sure if loops are
1938 * worth it. */
1939 /* Check RCX/ECX. */
1940 if (f64Bit)
1941 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1942 else
1943 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1944
1945 /* Check the EFlags bit. */
1946 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1947 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1948 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1949 !fCheckIfSet /*fJmpIfSet*/);
1950
1951 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1952 iemNativeRegFreeTmp(pReNative, idxEflReg);
1953
1954 iemNativeCondStartIfBlock(pReNative, off);
1955 return off;
1956}
1957
1958
1959#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
1960 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
1961 do {
1962
1963/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
1964DECL_INLINE_THROW(uint32_t)
1965iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
1966{
1967 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1968
1969 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
1970 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
1971 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
1972 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
1973
1974 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
1975
1976 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
1977
1978 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
1979
1980 iemNativeCondStartIfBlock(pReNative, off);
1981 return off;
1982}
1983
1984
1985#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
1986 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
1987 do {
1988
1989/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
1990DECL_INLINE_THROW(uint32_t)
1991iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
1992{
1993 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1994 Assert(iGReg < 16);
1995
1996 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
1997 kIemNativeGstRegUse_ReadOnly);
1998
1999 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
2000
2001 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2002
2003 iemNativeCondStartIfBlock(pReNative, off);
2004 return off;
2005}
2006
2007
2008#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2009
2010#define IEM_MC_IF_MXCSR_XCPT_PENDING() \
2011 off = iemNativeEmitIfMxcsrXcptPending(pReNative, off); \
2012 do {
2013
2014/** Emits code for IEM_MC_IF_MXCSR_XCPT_PENDING. */
2015DECL_INLINE_THROW(uint32_t)
2016iemNativeEmitIfMxcsrXcptPending(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2017{
2018 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2019
2020 uint8_t const idxGstMxcsrReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
2021 kIemNativeGstRegUse_Calculation);
2022 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
2023
2024 /* mov tmp0, mxcsr */
2025 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp, idxGstMxcsrReg);
2026 /* tmp0 &= X86_MXCSR_XCPT_FLAGS */
2027 off = iemNativeEmitAndGprByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
2028 /* mxcsr &= X86_MXCSR_XCPT_MASK */
2029 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstMxcsrReg, X86_MXCSR_XCPT_MASK);
2030 /* mxcsr ~= mxcsr */
2031 off = iemNativeEmitInvBitsGpr(pReNative, off, idxGstMxcsrReg, idxGstMxcsrReg);
2032 /* mxcsr >>= X86_MXCSR_XCPT_MASK_SHIFT */
2033 off = iemNativeEmitShiftGprRight(pReNative, off, idxGstMxcsrReg, X86_MXCSR_XCPT_MASK_SHIFT);
2034 /* tmp0 &= mxcsr */
2035 off = iemNativeEmitAndGprByGpr(pReNative, off, idxRegTmp, idxGstMxcsrReg);
2036
2037 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxRegTmp, true /*f64Bit*/, pEntry->idxLabelElse);
2038 iemNativeRegFreeTmp(pReNative, idxGstMxcsrReg);
2039 iemNativeRegFreeTmp(pReNative, idxRegTmp);
2040
2041 iemNativeCondStartIfBlock(pReNative, off);
2042 return off;
2043}
2044
2045#endif
2046
2047
2048/*********************************************************************************************************************************
2049* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
2050*********************************************************************************************************************************/
2051
2052#define IEM_MC_NOREF(a_Name) \
2053 RT_NOREF_PV(a_Name)
2054
2055#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
2056 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
2057
2058#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
2059 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
2060
2061#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
2062 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
2063
2064#define IEM_MC_LOCAL(a_Type, a_Name) \
2065 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
2066
2067#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
2068 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
2069
2070#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
2071 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
2072
2073
2074/**
2075 * Sets the host register for @a idxVarRc to @a idxReg.
2076 *
2077 * The register must not be allocated. Any guest register shadowing will be
2078 * implictly dropped by this call.
2079 *
2080 * The variable must not have any register associated with it (causes
2081 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
2082 * implied.
2083 *
2084 * @returns idxReg
2085 * @param pReNative The recompiler state.
2086 * @param idxVar The variable.
2087 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
2088 * @param off For recording in debug info.
2089 *
2090 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
2091 */
2092DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
2093{
2094 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2095 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
2096 Assert(!pVar->fRegAcquired);
2097 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2098 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
2099 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
2100
2101 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
2102 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
2103
2104 iemNativeVarSetKindToStack(pReNative, idxVar);
2105 pVar->idxReg = idxReg;
2106
2107 return idxReg;
2108}
2109
2110
2111/**
2112 * A convenient helper function.
2113 */
2114DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
2115 uint8_t idxReg, uint32_t *poff)
2116{
2117 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
2118 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
2119 return idxReg;
2120}
2121
2122
2123/**
2124 * This is called by IEM_MC_END() to clean up all variables.
2125 */
2126DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
2127{
2128 uint32_t const bmVars = pReNative->Core.bmVars;
2129 if (bmVars != 0)
2130 iemNativeVarFreeAllSlow(pReNative, bmVars);
2131 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
2132 Assert(pReNative->Core.bmStack == 0);
2133}
2134
2135
2136#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
2137
2138/**
2139 * This is called by IEM_MC_FREE_LOCAL.
2140 */
2141DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2142{
2143 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2144 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
2145 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2146}
2147
2148
2149#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
2150
2151/**
2152 * This is called by IEM_MC_FREE_ARG.
2153 */
2154DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2155{
2156 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2157 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
2158 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2159}
2160
2161
2162#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
2163
2164/**
2165 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
2166 */
2167DECL_INLINE_THROW(uint32_t)
2168iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
2169{
2170 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
2171 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
2172 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2173 Assert( pVarDst->cbVar == sizeof(uint16_t)
2174 || pVarDst->cbVar == sizeof(uint32_t));
2175
2176 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
2177 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
2178 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
2179 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
2180 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2181
2182 Assert(pVarDst->cbVar < pVarSrc->cbVar);
2183
2184 /*
2185 * Special case for immediates.
2186 */
2187 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
2188 {
2189 switch (pVarDst->cbVar)
2190 {
2191 case sizeof(uint16_t):
2192 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
2193 break;
2194 case sizeof(uint32_t):
2195 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
2196 break;
2197 default: AssertFailed(); break;
2198 }
2199 }
2200 else
2201 {
2202 /*
2203 * The generic solution for now.
2204 */
2205 /** @todo optimize this by having the python script make sure the source
2206 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
2207 * statement. Then we could just transfer the register assignments. */
2208 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
2209 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
2210 switch (pVarDst->cbVar)
2211 {
2212 case sizeof(uint16_t):
2213 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
2214 break;
2215 case sizeof(uint32_t):
2216 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
2217 break;
2218 default: AssertFailed(); break;
2219 }
2220 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
2221 iemNativeVarRegisterRelease(pReNative, idxVarDst);
2222 }
2223 return off;
2224}
2225
2226
2227
2228/*********************************************************************************************************************************
2229* Emitters for IEM_MC_CALL_CIMPL_XXX *
2230*********************************************************************************************************************************/
2231
2232/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
2233DECL_INLINE_THROW(uint32_t)
2234iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
2235 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
2236
2237{
2238 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
2239
2240#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2241 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
2242 when a calls clobber any of the relevatn control registers. */
2243# if 1
2244 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
2245 {
2246 /* Likely as long as call+ret are done via cimpl. */
2247 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
2248 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
2249 }
2250 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
2251 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2252 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
2253 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2254 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
2255 else
2256 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2257 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2258 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2259
2260# else
2261 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
2262 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2263 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
2264 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2265 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
2266 || pfnCImpl == (uintptr_t)iemCImpl_callf
2267 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
2268 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
2269 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2270 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2271 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2272# endif
2273#endif
2274
2275 /*
2276 * Do all the call setup and cleanup.
2277 */
2278 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
2279
2280 /*
2281 * Load the two or three hidden arguments.
2282 */
2283#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2284 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
2285 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2286 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
2287#else
2288 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2289 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
2290#endif
2291
2292 /*
2293 * Make the call and check the return code.
2294 *
2295 * Shadow PC copies are always flushed here, other stuff depends on flags.
2296 * Segment and general purpose registers are explictily flushed via the
2297 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
2298 * macros.
2299 */
2300 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
2301#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2302 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
2303#endif
2304 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
2305 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
2306 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
2307 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
2308
2309 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
2310}
2311
2312
2313#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
2314 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
2315
2316/** Emits code for IEM_MC_CALL_CIMPL_1. */
2317DECL_INLINE_THROW(uint32_t)
2318iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2319 uintptr_t pfnCImpl, uint8_t idxArg0)
2320{
2321 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2322 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
2323}
2324
2325
2326#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
2327 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
2328
2329/** Emits code for IEM_MC_CALL_CIMPL_2. */
2330DECL_INLINE_THROW(uint32_t)
2331iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2332 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
2333{
2334 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2335 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2336 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
2337}
2338
2339
2340#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
2341 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2342 (uintptr_t)a_pfnCImpl, a0, a1, a2)
2343
2344/** Emits code for IEM_MC_CALL_CIMPL_3. */
2345DECL_INLINE_THROW(uint32_t)
2346iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2347 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2348{
2349 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2350 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2351 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2352 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
2353}
2354
2355
2356#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
2357 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2358 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
2359
2360/** Emits code for IEM_MC_CALL_CIMPL_4. */
2361DECL_INLINE_THROW(uint32_t)
2362iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2363 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2364{
2365 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2366 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2367 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2368 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2369 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
2370}
2371
2372
2373#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
2374 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2375 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
2376
2377/** Emits code for IEM_MC_CALL_CIMPL_4. */
2378DECL_INLINE_THROW(uint32_t)
2379iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2380 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
2381{
2382 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2383 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2384 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2385 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2386 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
2387 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
2388}
2389
2390
2391/** Recompiler debugging: Flush guest register shadow copies. */
2392#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
2393
2394
2395
2396/*********************************************************************************************************************************
2397* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
2398*********************************************************************************************************************************/
2399
2400/**
2401 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
2402 */
2403DECL_INLINE_THROW(uint32_t)
2404iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2405 uintptr_t pfnAImpl, uint8_t cArgs)
2406{
2407 if (idxVarRc != UINT8_MAX)
2408 {
2409 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
2410 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
2411 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2412 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2413 }
2414
2415 /*
2416 * Do all the call setup and cleanup.
2417 *
2418 * It is only required to flush pending guest register writes in call volatile registers as
2419 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
2420 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
2421 * no matter the fFlushPendingWrites parameter.
2422 */
2423 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
2424
2425 /*
2426 * Make the call and update the return code variable if we've got one.
2427 */
2428 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
2429 if (idxVarRc != UINT8_MAX)
2430 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
2431
2432 return off;
2433}
2434
2435
2436
2437#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
2438 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
2439
2440#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
2441 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
2442
2443/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
2444DECL_INLINE_THROW(uint32_t)
2445iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
2446{
2447 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
2448}
2449
2450
2451#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
2452 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
2453
2454#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
2455 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
2456
2457/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
2458DECL_INLINE_THROW(uint32_t)
2459iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
2460{
2461 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2462 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
2463}
2464
2465
2466#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
2467 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
2468
2469#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
2470 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
2471
2472/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
2473DECL_INLINE_THROW(uint32_t)
2474iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2475 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
2476{
2477 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2478 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2479 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
2480}
2481
2482
2483#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
2484 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
2485
2486#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
2487 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
2488
2489/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
2490DECL_INLINE_THROW(uint32_t)
2491iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2492 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2493{
2494 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2495 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2496 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2497 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
2498}
2499
2500
2501#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
2502 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2503
2504#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
2505 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2506
2507/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
2508DECL_INLINE_THROW(uint32_t)
2509iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2510 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2511{
2512 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2513 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2514 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2515 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
2516 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
2517}
2518
2519
2520
2521/*********************************************************************************************************************************
2522* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
2523*********************************************************************************************************************************/
2524
2525#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
2526 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
2527
2528#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2529 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
2530
2531#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2532 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
2533
2534#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2535 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
2536
2537
2538/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
2539 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
2540DECL_INLINE_THROW(uint32_t)
2541iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
2542{
2543 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2544 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2545 Assert(iGRegEx < 20);
2546
2547 /* Same discussion as in iemNativeEmitFetchGregU16 */
2548 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2549 kIemNativeGstRegUse_ReadOnly);
2550
2551 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2552 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2553
2554 /* The value is zero-extended to the full 64-bit host register width. */
2555 if (iGRegEx < 16)
2556 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2557 else
2558 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2559
2560 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2561 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2562 return off;
2563}
2564
2565
2566#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2567 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
2568
2569#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2570 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
2571
2572#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2573 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
2574
2575/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
2576DECL_INLINE_THROW(uint32_t)
2577iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
2578{
2579 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2580 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2581 Assert(iGRegEx < 20);
2582
2583 /* Same discussion as in iemNativeEmitFetchGregU16 */
2584 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2585 kIemNativeGstRegUse_ReadOnly);
2586
2587 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2588 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2589
2590 if (iGRegEx < 16)
2591 {
2592 switch (cbSignExtended)
2593 {
2594 case sizeof(uint16_t):
2595 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2596 break;
2597 case sizeof(uint32_t):
2598 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2599 break;
2600 case sizeof(uint64_t):
2601 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2602 break;
2603 default: AssertFailed(); break;
2604 }
2605 }
2606 else
2607 {
2608 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2609 switch (cbSignExtended)
2610 {
2611 case sizeof(uint16_t):
2612 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2613 break;
2614 case sizeof(uint32_t):
2615 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2616 break;
2617 case sizeof(uint64_t):
2618 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2619 break;
2620 default: AssertFailed(); break;
2621 }
2622 }
2623
2624 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2625 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2626 return off;
2627}
2628
2629
2630
2631#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
2632 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
2633
2634#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
2635 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2636
2637#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
2638 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2639
2640/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
2641DECL_INLINE_THROW(uint32_t)
2642iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2643{
2644 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2645 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2646 Assert(iGReg < 16);
2647
2648 /*
2649 * We can either just load the low 16-bit of the GPR into a host register
2650 * for the variable, or we can do so via a shadow copy host register. The
2651 * latter will avoid having to reload it if it's being stored later, but
2652 * will waste a host register if it isn't touched again. Since we don't
2653 * know what going to happen, we choose the latter for now.
2654 */
2655 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2656 kIemNativeGstRegUse_ReadOnly);
2657
2658 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2659 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2660 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2661 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2662
2663 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2664 return off;
2665}
2666
2667
2668#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
2669 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2670
2671#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
2672 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2673
2674/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
2675DECL_INLINE_THROW(uint32_t)
2676iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
2677{
2678 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2679 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2680 Assert(iGReg < 16);
2681
2682 /*
2683 * We can either just load the low 16-bit of the GPR into a host register
2684 * for the variable, or we can do so via a shadow copy host register. The
2685 * latter will avoid having to reload it if it's being stored later, but
2686 * will waste a host register if it isn't touched again. Since we don't
2687 * know what going to happen, we choose the latter for now.
2688 */
2689 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2690 kIemNativeGstRegUse_ReadOnly);
2691
2692 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2693 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2694 if (cbSignExtended == sizeof(uint32_t))
2695 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2696 else
2697 {
2698 Assert(cbSignExtended == sizeof(uint64_t));
2699 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2700 }
2701 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2702
2703 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2704 return off;
2705}
2706
2707
2708#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
2709 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
2710
2711#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
2712 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
2713
2714/** Emits code for IEM_MC_FETCH_GREG_U32. */
2715DECL_INLINE_THROW(uint32_t)
2716iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2717{
2718 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2719 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2720 Assert(iGReg < 16);
2721
2722 /*
2723 * We can either just load the low 16-bit of the GPR into a host register
2724 * for the variable, or we can do so via a shadow copy host register. The
2725 * latter will avoid having to reload it if it's being stored later, but
2726 * will waste a host register if it isn't touched again. Since we don't
2727 * know what going to happen, we choose the latter for now.
2728 */
2729 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2730 kIemNativeGstRegUse_ReadOnly);
2731
2732 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2733 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2734 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2735 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2736
2737 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2738 return off;
2739}
2740
2741
2742#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
2743 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
2744
2745/** Emits code for IEM_MC_FETCH_GREG_U32. */
2746DECL_INLINE_THROW(uint32_t)
2747iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2748{
2749 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2750 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2751 Assert(iGReg < 16);
2752
2753 /*
2754 * We can either just load the low 32-bit of the GPR into a host register
2755 * for the variable, or we can do so via a shadow copy host register. The
2756 * latter will avoid having to reload it if it's being stored later, but
2757 * will waste a host register if it isn't touched again. Since we don't
2758 * know what going to happen, we choose the latter for now.
2759 */
2760 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2761 kIemNativeGstRegUse_ReadOnly);
2762
2763 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2764 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2765 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2766 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2767
2768 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2769 return off;
2770}
2771
2772
2773#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
2774 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2775
2776#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
2777 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2778
2779/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
2780 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
2781DECL_INLINE_THROW(uint32_t)
2782iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2783{
2784 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2785 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2786 Assert(iGReg < 16);
2787
2788 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2789 kIemNativeGstRegUse_ReadOnly);
2790
2791 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2792 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2793 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
2794 /** @todo name the register a shadow one already? */
2795 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2796
2797 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2798 return off;
2799}
2800
2801
2802
2803/*********************************************************************************************************************************
2804* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
2805*********************************************************************************************************************************/
2806
2807#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
2808 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
2809
2810/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
2811DECL_INLINE_THROW(uint32_t)
2812iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
2813{
2814 Assert(iGRegEx < 20);
2815 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2816 kIemNativeGstRegUse_ForUpdate);
2817#ifdef RT_ARCH_AMD64
2818 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2819
2820 /* To the lowest byte of the register: mov r8, imm8 */
2821 if (iGRegEx < 16)
2822 {
2823 if (idxGstTmpReg >= 8)
2824 pbCodeBuf[off++] = X86_OP_REX_B;
2825 else if (idxGstTmpReg >= 4)
2826 pbCodeBuf[off++] = X86_OP_REX;
2827 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2828 pbCodeBuf[off++] = u8Value;
2829 }
2830 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
2831 else if (idxGstTmpReg < 4)
2832 {
2833 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
2834 pbCodeBuf[off++] = u8Value;
2835 }
2836 else
2837 {
2838 /* ror reg64, 8 */
2839 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2840 pbCodeBuf[off++] = 0xc1;
2841 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2842 pbCodeBuf[off++] = 8;
2843
2844 /* mov reg8, imm8 */
2845 if (idxGstTmpReg >= 8)
2846 pbCodeBuf[off++] = X86_OP_REX_B;
2847 else if (idxGstTmpReg >= 4)
2848 pbCodeBuf[off++] = X86_OP_REX;
2849 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2850 pbCodeBuf[off++] = u8Value;
2851
2852 /* rol reg64, 8 */
2853 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2854 pbCodeBuf[off++] = 0xc1;
2855 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2856 pbCodeBuf[off++] = 8;
2857 }
2858
2859#elif defined(RT_ARCH_ARM64)
2860 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
2861 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2862 if (iGRegEx < 16)
2863 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
2864 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
2865 else
2866 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
2867 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
2868 iemNativeRegFreeTmp(pReNative, idxImmReg);
2869
2870#else
2871# error "Port me!"
2872#endif
2873
2874 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2875
2876#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
2877 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
2878#endif
2879
2880 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2881 return off;
2882}
2883
2884
2885#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
2886 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
2887
2888/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
2889DECL_INLINE_THROW(uint32_t)
2890iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
2891{
2892 Assert(iGRegEx < 20);
2893 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
2894
2895 /*
2896 * If it's a constant value (unlikely) we treat this as a
2897 * IEM_MC_STORE_GREG_U8_CONST statement.
2898 */
2899 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
2900 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
2901 { /* likely */ }
2902 else
2903 {
2904 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
2905 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2906 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
2907 }
2908
2909 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2910 kIemNativeGstRegUse_ForUpdate);
2911 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
2912
2913#ifdef RT_ARCH_AMD64
2914 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
2915 if (iGRegEx < 16)
2916 {
2917 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
2918 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
2919 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
2920 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
2921 pbCodeBuf[off++] = X86_OP_REX;
2922 pbCodeBuf[off++] = 0x8a;
2923 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
2924 }
2925 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
2926 else if (idxGstTmpReg < 4 && idxVarReg < 4)
2927 {
2928 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
2929 pbCodeBuf[off++] = 0x8a;
2930 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
2931 }
2932 else
2933 {
2934 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
2935
2936 /* ror reg64, 8 */
2937 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2938 pbCodeBuf[off++] = 0xc1;
2939 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2940 pbCodeBuf[off++] = 8;
2941
2942 /* mov reg8, reg8(r/m) */
2943 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
2944 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
2945 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
2946 pbCodeBuf[off++] = X86_OP_REX;
2947 pbCodeBuf[off++] = 0x8a;
2948 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
2949
2950 /* rol reg64, 8 */
2951 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2952 pbCodeBuf[off++] = 0xc1;
2953 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2954 pbCodeBuf[off++] = 8;
2955 }
2956
2957#elif defined(RT_ARCH_ARM64)
2958 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
2959 or
2960 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
2961 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2962 if (iGRegEx < 16)
2963 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
2964 else
2965 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
2966
2967#else
2968# error "Port me!"
2969#endif
2970 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2971
2972 iemNativeVarRegisterRelease(pReNative, idxValueVar);
2973
2974#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
2975 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
2976#endif
2977 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2978 return off;
2979}
2980
2981
2982
2983#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
2984 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
2985
2986/** Emits code for IEM_MC_STORE_GREG_U16. */
2987DECL_INLINE_THROW(uint32_t)
2988iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
2989{
2990 Assert(iGReg < 16);
2991 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2992 kIemNativeGstRegUse_ForUpdate);
2993#ifdef RT_ARCH_AMD64
2994 /* mov reg16, imm16 */
2995 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
2996 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2997 if (idxGstTmpReg >= 8)
2998 pbCodeBuf[off++] = X86_OP_REX_B;
2999 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
3000 pbCodeBuf[off++] = RT_BYTE1(uValue);
3001 pbCodeBuf[off++] = RT_BYTE2(uValue);
3002
3003#elif defined(RT_ARCH_ARM64)
3004 /* movk xdst, #uValue, lsl #0 */
3005 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3006 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
3007
3008#else
3009# error "Port me!"
3010#endif
3011
3012 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3013
3014#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3015 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3016#endif
3017 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3018 return off;
3019}
3020
3021
3022#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
3023 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
3024
3025/** Emits code for IEM_MC_STORE_GREG_U16. */
3026DECL_INLINE_THROW(uint32_t)
3027iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3028{
3029 Assert(iGReg < 16);
3030 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3031
3032 /*
3033 * If it's a constant value (unlikely) we treat this as a
3034 * IEM_MC_STORE_GREG_U16_CONST statement.
3035 */
3036 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3037 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3038 { /* likely */ }
3039 else
3040 {
3041 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3042 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3043 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
3044 }
3045
3046 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3047 kIemNativeGstRegUse_ForUpdate);
3048
3049#ifdef RT_ARCH_AMD64
3050 /* mov reg16, reg16 or [mem16] */
3051 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3052 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3053 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
3054 {
3055 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
3056 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
3057 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
3058 pbCodeBuf[off++] = 0x8b;
3059 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
3060 }
3061 else
3062 {
3063 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
3064 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
3065 if (idxGstTmpReg >= 8)
3066 pbCodeBuf[off++] = X86_OP_REX_R;
3067 pbCodeBuf[off++] = 0x8b;
3068 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
3069 }
3070
3071#elif defined(RT_ARCH_ARM64)
3072 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
3073 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
3074 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3075 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
3076 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3077
3078#else
3079# error "Port me!"
3080#endif
3081
3082 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3083
3084#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3085 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3086#endif
3087 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3088 return off;
3089}
3090
3091
3092#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
3093 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
3094
3095/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
3096DECL_INLINE_THROW(uint32_t)
3097iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
3098{
3099 Assert(iGReg < 16);
3100 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3101 kIemNativeGstRegUse_ForFullWrite);
3102 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3103#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3104 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3105#endif
3106 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3107 return off;
3108}
3109
3110
3111#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
3112 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
3113
3114/** Emits code for IEM_MC_STORE_GREG_U32. */
3115DECL_INLINE_THROW(uint32_t)
3116iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3117{
3118 Assert(iGReg < 16);
3119 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3120
3121 /*
3122 * If it's a constant value (unlikely) we treat this as a
3123 * IEM_MC_STORE_GREG_U32_CONST statement.
3124 */
3125 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3126 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3127 { /* likely */ }
3128 else
3129 {
3130 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3131 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3132 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
3133 }
3134
3135 /*
3136 * For the rest we allocate a guest register for the variable and writes
3137 * it to the CPUMCTX structure.
3138 */
3139 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3140#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3141 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3142#else
3143 RT_NOREF(idxVarReg);
3144#endif
3145#ifdef VBOX_STRICT
3146 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
3147#endif
3148 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3149 return off;
3150}
3151
3152
3153#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
3154 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
3155
3156/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
3157DECL_INLINE_THROW(uint32_t)
3158iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
3159{
3160 Assert(iGReg < 16);
3161 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3162 kIemNativeGstRegUse_ForFullWrite);
3163 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3164#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3165 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3166#endif
3167 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3168 return off;
3169}
3170
3171
3172#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
3173 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
3174
3175#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
3176 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
3177
3178/** Emits code for IEM_MC_STORE_GREG_U64. */
3179DECL_INLINE_THROW(uint32_t)
3180iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3181{
3182 Assert(iGReg < 16);
3183 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3184
3185 /*
3186 * If it's a constant value (unlikely) we treat this as a
3187 * IEM_MC_STORE_GREG_U64_CONST statement.
3188 */
3189 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3190 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3191 { /* likely */ }
3192 else
3193 {
3194 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3195 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3196 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
3197 }
3198
3199 /*
3200 * For the rest we allocate a guest register for the variable and writes
3201 * it to the CPUMCTX structure.
3202 */
3203 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3204#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3205 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3206#else
3207 RT_NOREF(idxVarReg);
3208#endif
3209 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3210 return off;
3211}
3212
3213
3214#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
3215 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
3216
3217/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
3218DECL_INLINE_THROW(uint32_t)
3219iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
3220{
3221 Assert(iGReg < 16);
3222 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3223 kIemNativeGstRegUse_ForUpdate);
3224 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
3225#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3226 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3227#endif
3228 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3229 return off;
3230}
3231
3232
3233/*********************************************************************************************************************************
3234* General purpose register manipulation (add, sub). *
3235*********************************************************************************************************************************/
3236
3237#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3238 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3239
3240/** Emits code for IEM_MC_ADD_GREG_U16. */
3241DECL_INLINE_THROW(uint32_t)
3242iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
3243{
3244 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3245 kIemNativeGstRegUse_ForUpdate);
3246
3247#ifdef RT_ARCH_AMD64
3248 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3249 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3250 if (idxGstTmpReg >= 8)
3251 pbCodeBuf[off++] = X86_OP_REX_B;
3252 if (uAddend == 1)
3253 {
3254 pbCodeBuf[off++] = 0xff; /* inc */
3255 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3256 }
3257 else
3258 {
3259 pbCodeBuf[off++] = 0x81;
3260 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3261 pbCodeBuf[off++] = uAddend;
3262 pbCodeBuf[off++] = 0;
3263 }
3264
3265#else
3266 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3267 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3268
3269 /* sub tmp, gstgrp, uAddend */
3270 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
3271
3272 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3273 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3274
3275 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3276#endif
3277
3278 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3279
3280#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3281 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3282#endif
3283
3284 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3285 return off;
3286}
3287
3288
3289#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
3290 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3291
3292#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
3293 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3294
3295/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
3296DECL_INLINE_THROW(uint32_t)
3297iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
3298{
3299 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3300 kIemNativeGstRegUse_ForUpdate);
3301
3302#ifdef RT_ARCH_AMD64
3303 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3304 if (f64Bit)
3305 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3306 else if (idxGstTmpReg >= 8)
3307 pbCodeBuf[off++] = X86_OP_REX_B;
3308 if (uAddend == 1)
3309 {
3310 pbCodeBuf[off++] = 0xff; /* inc */
3311 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3312 }
3313 else if (uAddend < 128)
3314 {
3315 pbCodeBuf[off++] = 0x83; /* add */
3316 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3317 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3318 }
3319 else
3320 {
3321 pbCodeBuf[off++] = 0x81; /* add */
3322 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3323 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3324 pbCodeBuf[off++] = 0;
3325 pbCodeBuf[off++] = 0;
3326 pbCodeBuf[off++] = 0;
3327 }
3328
3329#else
3330 /* sub tmp, gstgrp, uAddend */
3331 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3332 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
3333
3334#endif
3335
3336 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3337
3338#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3339 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3340#endif
3341
3342 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3343 return off;
3344}
3345
3346
3347
3348#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3349 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3350
3351/** Emits code for IEM_MC_SUB_GREG_U16. */
3352DECL_INLINE_THROW(uint32_t)
3353iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
3354{
3355 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3356 kIemNativeGstRegUse_ForUpdate);
3357
3358#ifdef RT_ARCH_AMD64
3359 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3360 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3361 if (idxGstTmpReg >= 8)
3362 pbCodeBuf[off++] = X86_OP_REX_B;
3363 if (uSubtrahend == 1)
3364 {
3365 pbCodeBuf[off++] = 0xff; /* dec */
3366 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3367 }
3368 else
3369 {
3370 pbCodeBuf[off++] = 0x81;
3371 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3372 pbCodeBuf[off++] = uSubtrahend;
3373 pbCodeBuf[off++] = 0;
3374 }
3375
3376#else
3377 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3378 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3379
3380 /* sub tmp, gstgrp, uSubtrahend */
3381 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
3382
3383 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3384 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3385
3386 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3387#endif
3388
3389 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3390
3391#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3392 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3393#endif
3394
3395 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3396 return off;
3397}
3398
3399
3400#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
3401 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3402
3403#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
3404 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3405
3406/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
3407DECL_INLINE_THROW(uint32_t)
3408iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
3409{
3410 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3411 kIemNativeGstRegUse_ForUpdate);
3412
3413#ifdef RT_ARCH_AMD64
3414 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3415 if (f64Bit)
3416 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3417 else if (idxGstTmpReg >= 8)
3418 pbCodeBuf[off++] = X86_OP_REX_B;
3419 if (uSubtrahend == 1)
3420 {
3421 pbCodeBuf[off++] = 0xff; /* dec */
3422 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3423 }
3424 else if (uSubtrahend < 128)
3425 {
3426 pbCodeBuf[off++] = 0x83; /* sub */
3427 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3428 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3429 }
3430 else
3431 {
3432 pbCodeBuf[off++] = 0x81; /* sub */
3433 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3434 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3435 pbCodeBuf[off++] = 0;
3436 pbCodeBuf[off++] = 0;
3437 pbCodeBuf[off++] = 0;
3438 }
3439
3440#else
3441 /* sub tmp, gstgrp, uSubtrahend */
3442 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3443 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
3444
3445#endif
3446
3447 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3448
3449#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3450 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3451#endif
3452
3453 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3454 return off;
3455}
3456
3457
3458#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
3459 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3460
3461#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
3462 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3463
3464#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
3465 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3466
3467#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
3468 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3469
3470/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
3471DECL_INLINE_THROW(uint32_t)
3472iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3473{
3474#ifdef VBOX_STRICT
3475 switch (cbMask)
3476 {
3477 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3478 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3479 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3480 case sizeof(uint64_t): break;
3481 default: AssertFailedBreak();
3482 }
3483#endif
3484
3485 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3486 kIemNativeGstRegUse_ForUpdate);
3487
3488 switch (cbMask)
3489 {
3490 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3491 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
3492 break;
3493 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
3494 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
3495 break;
3496 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3497 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3498 break;
3499 case sizeof(uint64_t):
3500 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
3501 break;
3502 default: AssertFailedBreak();
3503 }
3504
3505 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3506
3507#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3508 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3509#endif
3510
3511 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3512 return off;
3513}
3514
3515
3516#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
3517 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3518
3519#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
3520 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3521
3522#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
3523 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3524
3525#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
3526 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3527
3528/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
3529DECL_INLINE_THROW(uint32_t)
3530iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3531{
3532#ifdef VBOX_STRICT
3533 switch (cbMask)
3534 {
3535 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3536 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3537 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3538 case sizeof(uint64_t): break;
3539 default: AssertFailedBreak();
3540 }
3541#endif
3542
3543 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3544 kIemNativeGstRegUse_ForUpdate);
3545
3546 switch (cbMask)
3547 {
3548 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3549 case sizeof(uint16_t):
3550 case sizeof(uint64_t):
3551 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
3552 break;
3553 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3554 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3555 break;
3556 default: AssertFailedBreak();
3557 }
3558
3559 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3560
3561#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3562 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3563#endif
3564
3565 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3566 return off;
3567}
3568
3569
3570/*********************************************************************************************************************************
3571* Local/Argument variable manipulation (add, sub, and, or). *
3572*********************************************************************************************************************************/
3573
3574#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
3575 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3576
3577#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
3578 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3579
3580#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
3581 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3582
3583#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
3584 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3585
3586
3587#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
3588 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
3589
3590#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
3591 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
3592
3593#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
3594 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
3595
3596/** Emits code for AND'ing a local and a constant value. */
3597DECL_INLINE_THROW(uint32_t)
3598iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3599{
3600#ifdef VBOX_STRICT
3601 switch (cbMask)
3602 {
3603 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3604 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3605 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3606 case sizeof(uint64_t): break;
3607 default: AssertFailedBreak();
3608 }
3609#endif
3610
3611 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3612 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3613
3614 if (cbMask <= sizeof(uint32_t))
3615 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
3616 else
3617 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
3618
3619 iemNativeVarRegisterRelease(pReNative, idxVar);
3620 return off;
3621}
3622
3623
3624#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
3625 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3626
3627#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
3628 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3629
3630#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
3631 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3632
3633#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
3634 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3635
3636/** Emits code for OR'ing a local and a constant value. */
3637DECL_INLINE_THROW(uint32_t)
3638iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3639{
3640#ifdef VBOX_STRICT
3641 switch (cbMask)
3642 {
3643 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3644 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3645 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3646 case sizeof(uint64_t): break;
3647 default: AssertFailedBreak();
3648 }
3649#endif
3650
3651 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3652 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3653
3654 if (cbMask <= sizeof(uint32_t))
3655 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
3656 else
3657 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
3658
3659 iemNativeVarRegisterRelease(pReNative, idxVar);
3660 return off;
3661}
3662
3663
3664#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
3665 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
3666
3667#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
3668 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
3669
3670#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
3671 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
3672
3673/** Emits code for reversing the byte order in a local value. */
3674DECL_INLINE_THROW(uint32_t)
3675iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
3676{
3677 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3678 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3679
3680 switch (cbLocal)
3681 {
3682 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
3683 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
3684 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
3685 default: AssertFailedBreak();
3686 }
3687
3688 iemNativeVarRegisterRelease(pReNative, idxVar);
3689 return off;
3690}
3691
3692
3693#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
3694 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
3695
3696#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
3697 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
3698
3699#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
3700 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
3701
3702/** Emits code for shifting left a local value. */
3703DECL_INLINE_THROW(uint32_t)
3704iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
3705{
3706#ifdef VBOX_STRICT
3707 switch (cbLocal)
3708 {
3709 case sizeof(uint8_t): Assert(cShift < 8); break;
3710 case sizeof(uint16_t): Assert(cShift < 16); break;
3711 case sizeof(uint32_t): Assert(cShift < 32); break;
3712 case sizeof(uint64_t): Assert(cShift < 64); break;
3713 default: AssertFailedBreak();
3714 }
3715#endif
3716
3717 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3718 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3719
3720 if (cbLocal <= sizeof(uint32_t))
3721 {
3722 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
3723 if (cbLocal < sizeof(uint32_t))
3724 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
3725 cbLocal == sizeof(uint16_t)
3726 ? UINT32_C(0xffff)
3727 : UINT32_C(0xff));
3728 }
3729 else
3730 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
3731
3732 iemNativeVarRegisterRelease(pReNative, idxVar);
3733 return off;
3734}
3735
3736
3737#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
3738 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
3739
3740#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
3741 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
3742
3743#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
3744 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
3745
3746/** Emits code for shifting left a local value. */
3747DECL_INLINE_THROW(uint32_t)
3748iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
3749{
3750#ifdef VBOX_STRICT
3751 switch (cbLocal)
3752 {
3753 case sizeof(int8_t): Assert(cShift < 8); break;
3754 case sizeof(int16_t): Assert(cShift < 16); break;
3755 case sizeof(int32_t): Assert(cShift < 32); break;
3756 case sizeof(int64_t): Assert(cShift < 64); break;
3757 default: AssertFailedBreak();
3758 }
3759#endif
3760
3761 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3762 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3763
3764 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
3765 if (cbLocal == sizeof(uint8_t))
3766 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3767 else if (cbLocal == sizeof(uint16_t))
3768 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
3769
3770 if (cbLocal <= sizeof(uint32_t))
3771 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
3772 else
3773 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
3774
3775 iemNativeVarRegisterRelease(pReNative, idxVar);
3776 return off;
3777}
3778
3779
3780#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
3781 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
3782
3783#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
3784 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
3785
3786#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
3787 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
3788
3789/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
3790DECL_INLINE_THROW(uint32_t)
3791iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
3792{
3793 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
3794 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
3795 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3796 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3797
3798 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3799 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
3800
3801 /* Need to sign extend the value. */
3802 if (cbLocal <= sizeof(uint32_t))
3803 {
3804/** @todo ARM64: In case of boredone, the extended add instruction can do the
3805 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
3806 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
3807
3808 switch (cbLocal)
3809 {
3810 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
3811 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
3812 default: AssertFailed();
3813 }
3814
3815 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
3816 iemNativeRegFreeTmp(pReNative, idxRegTmp);
3817 }
3818 else
3819 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
3820
3821 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
3822 iemNativeVarRegisterRelease(pReNative, idxVar);
3823 return off;
3824}
3825
3826
3827
3828/*********************************************************************************************************************************
3829* EFLAGS *
3830*********************************************************************************************************************************/
3831
3832#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
3833# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
3834#else
3835# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
3836 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
3837
3838DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
3839{
3840 if (fEflOutput)
3841 {
3842 PVMCPUCC const pVCpu = pReNative->pVCpu;
3843# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3844 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
3845 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
3846 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
3847# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3848 if (fEflOutput & (a_fEfl)) \
3849 { \
3850 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
3851 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3852 else \
3853 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3854 } else do { } while (0)
3855# else
3856 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
3857 IEMLIVENESSBIT const LivenessClobbered =
3858 {
3859 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3860 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3861 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3862 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3863 };
3864 IEMLIVENESSBIT const LivenessDelayable =
3865 {
3866 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3867 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3868 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3869 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3870 };
3871# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3872 if (fEflOutput & (a_fEfl)) \
3873 { \
3874 if (LivenessClobbered.a_fLivenessMember) \
3875 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3876 else if (LivenessDelayable.a_fLivenessMember) \
3877 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
3878 else \
3879 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3880 } else do { } while (0)
3881# endif
3882 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
3883 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
3884 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
3885 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
3886 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
3887 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
3888 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
3889# undef CHECK_FLAG_AND_UPDATE_STATS
3890 }
3891 RT_NOREF(fEflInput);
3892}
3893#endif /* VBOX_WITH_STATISTICS */
3894
3895#undef IEM_MC_FETCH_EFLAGS /* should not be used */
3896#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3897 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
3898
3899/** Handles IEM_MC_FETCH_EFLAGS_EX. */
3900DECL_INLINE_THROW(uint32_t)
3901iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
3902 uint32_t fEflInput, uint32_t fEflOutput)
3903{
3904 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
3905 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
3906 RT_NOREF(fEflInput, fEflOutput);
3907
3908#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3909# ifdef VBOX_STRICT
3910 if ( pReNative->idxCurCall != 0
3911 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
3912 {
3913 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
3914 uint32_t const fBoth = fEflInput | fEflOutput;
3915# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
3916 AssertMsg( !(fBoth & (a_fElfConst)) \
3917 || (!(fEflInput & (a_fElfConst)) \
3918 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
3919 : !(fEflOutput & (a_fElfConst)) \
3920 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
3921 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
3922 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
3923 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
3924 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
3925 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
3926 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
3927 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
3928 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
3929 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
3930# undef ASSERT_ONE_EFL
3931 }
3932# endif
3933#endif
3934
3935 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
3936
3937 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
3938 * the existing shadow copy. */
3939 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
3940 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
3941 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
3942 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
3943 return off;
3944}
3945
3946
3947
3948/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
3949 * start using it with custom native code emission (inlining assembly
3950 * instruction helpers). */
3951#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
3952#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3953 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
3954 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
3955
3956#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
3957#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3958 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
3959 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
3960
3961/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
3962DECL_INLINE_THROW(uint32_t)
3963iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
3964 bool fUpdateSkipping)
3965{
3966 RT_NOREF(fEflOutput);
3967 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
3968 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
3969
3970#ifdef VBOX_STRICT
3971 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
3972 uint32_t offFixup = off;
3973 off = iemNativeEmitJnzToFixed(pReNative, off, off);
3974 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
3975 iemNativeFixupFixedJump(pReNative, offFixup, off);
3976
3977 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
3978 offFixup = off;
3979 off = iemNativeEmitJzToFixed(pReNative, off, off);
3980 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
3981 iemNativeFixupFixedJump(pReNative, offFixup, off);
3982
3983 /** @todo validate that only bits in the fElfOutput mask changed. */
3984#endif
3985
3986#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3987 if (fUpdateSkipping)
3988 {
3989 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
3990 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
3991 else
3992 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
3993 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
3994 }
3995#else
3996 RT_NOREF_PV(fUpdateSkipping);
3997#endif
3998
3999 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
4000 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
4001 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
4002 return off;
4003}
4004
4005
4006
4007/*********************************************************************************************************************************
4008* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
4009*********************************************************************************************************************************/
4010
4011#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
4012 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
4013
4014#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
4015 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
4016
4017#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
4018 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
4019
4020
4021/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
4022 * IEM_MC_FETCH_SREG_ZX_U64. */
4023DECL_INLINE_THROW(uint32_t)
4024iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
4025{
4026 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4027 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
4028 Assert(iSReg < X86_SREG_COUNT);
4029
4030 /*
4031 * For now, we will not create a shadow copy of a selector. The rational
4032 * is that since we do not recompile the popping and loading of segment
4033 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
4034 * pushing and moving to registers, there is only a small chance that the
4035 * shadow copy will be accessed again before the register is reloaded. One
4036 * scenario would be nested called in 16-bit code, but I doubt it's worth
4037 * the extra register pressure atm.
4038 *
4039 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
4040 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
4041 * store scencario covered at present (r160730).
4042 */
4043 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4044 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4045 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
4046 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4047 return off;
4048}
4049
4050
4051
4052/*********************************************************************************************************************************
4053* Register references. *
4054*********************************************************************************************************************************/
4055
4056#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
4057 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
4058
4059#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
4060 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
4061
4062/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
4063DECL_INLINE_THROW(uint32_t)
4064iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
4065{
4066 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
4067 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4068 Assert(iGRegEx < 20);
4069
4070 if (iGRegEx < 16)
4071 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
4072 else
4073 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
4074
4075 /* If we've delayed writing back the register value, flush it now. */
4076 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
4077
4078 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4079 if (!fConst)
4080 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
4081
4082 return off;
4083}
4084
4085#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
4086 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
4087
4088#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
4089 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
4090
4091#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
4092 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
4093
4094#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
4095 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
4096
4097#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
4098 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
4099
4100#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
4101 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
4102
4103#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
4104 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
4105
4106#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
4107 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
4108
4109#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
4110 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
4111
4112#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
4113 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
4114
4115/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
4116DECL_INLINE_THROW(uint32_t)
4117iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
4118{
4119 Assert(iGReg < 16);
4120 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
4121 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4122
4123 /* If we've delayed writing back the register value, flush it now. */
4124 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
4125
4126 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4127 if (!fConst)
4128 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
4129
4130 return off;
4131}
4132
4133
4134#undef IEM_MC_REF_EFLAGS /* should not be used. */
4135#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
4136 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4137 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
4138
4139/** Handles IEM_MC_REF_EFLAGS. */
4140DECL_INLINE_THROW(uint32_t)
4141iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
4142{
4143 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
4144 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4145
4146#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
4147 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
4148
4149 /* Updating the skipping according to the outputs is a little early, but
4150 we don't have any other hooks for references atm. */
4151 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
4152 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4153 else if (fEflOutput & X86_EFL_STATUS_BITS)
4154 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
4155 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4156#else
4157 RT_NOREF(fEflInput, fEflOutput);
4158#endif
4159
4160 /* If we've delayed writing back the register value, flush it now. */
4161 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
4162
4163 /* If there is a shadow copy of guest EFLAGS, flush it now. */
4164 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
4165
4166 return off;
4167}
4168
4169
4170/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
4171 * different code from threaded recompiler, maybe it would be helpful. For now
4172 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
4173#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
4174
4175
4176#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
4177 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
4178
4179#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
4180 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
4181
4182#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
4183 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
4184
4185#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4186/* Just being paranoid here. */
4187# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
4188AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
4189AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
4190AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
4191AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
4192# endif
4193AssertCompileMemberOffset(X86XMMREG, au64, 0);
4194AssertCompileMemberOffset(X86XMMREG, au32, 0);
4195AssertCompileMemberOffset(X86XMMREG, ar64, 0);
4196AssertCompileMemberOffset(X86XMMREG, ar32, 0);
4197
4198# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
4199 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
4200# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
4201 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
4202# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
4203 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
4204# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
4205 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
4206#endif
4207
4208/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
4209DECL_INLINE_THROW(uint32_t)
4210iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
4211{
4212 Assert(iXReg < 16);
4213 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
4214 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4215
4216 /* If we've delayed writing back the register value, flush it now. */
4217 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
4218
4219#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4220 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4221 if (!fConst)
4222 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
4223#else
4224 RT_NOREF(fConst);
4225#endif
4226
4227 return off;
4228}
4229
4230
4231#define IEM_MC_REF_MXCSR(a_pfMxcsr) \
4232 off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
4233
4234/** Handles IEM_MC_REF_MXCSR. */
4235DECL_INLINE_THROW(uint32_t)
4236iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
4237{
4238 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
4239 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4240
4241 /* If we've delayed writing back the register value, flush it now. */
4242 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
4243
4244 /* If there is a shadow copy of guest MXCSR, flush it now. */
4245 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
4246
4247 return off;
4248}
4249
4250
4251
4252/*********************************************************************************************************************************
4253* Effective Address Calculation *
4254*********************************************************************************************************************************/
4255#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
4256 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
4257
4258/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
4259 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
4260DECL_INLINE_THROW(uint32_t)
4261iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4262 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
4263{
4264 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4265
4266 /*
4267 * Handle the disp16 form with no registers first.
4268 *
4269 * Convert to an immediate value, as that'll delay the register allocation
4270 * and assignment till the memory access / call / whatever and we can use
4271 * a more appropriate register (or none at all).
4272 */
4273 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
4274 {
4275 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
4276 return off;
4277 }
4278
4279 /* Determin the displacment. */
4280 uint16_t u16EffAddr;
4281 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4282 {
4283 case 0: u16EffAddr = 0; break;
4284 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
4285 case 2: u16EffAddr = u16Disp; break;
4286 default: AssertFailedStmt(u16EffAddr = 0);
4287 }
4288
4289 /* Determine the registers involved. */
4290 uint8_t idxGstRegBase;
4291 uint8_t idxGstRegIndex;
4292 switch (bRm & X86_MODRM_RM_MASK)
4293 {
4294 case 0:
4295 idxGstRegBase = X86_GREG_xBX;
4296 idxGstRegIndex = X86_GREG_xSI;
4297 break;
4298 case 1:
4299 idxGstRegBase = X86_GREG_xBX;
4300 idxGstRegIndex = X86_GREG_xDI;
4301 break;
4302 case 2:
4303 idxGstRegBase = X86_GREG_xBP;
4304 idxGstRegIndex = X86_GREG_xSI;
4305 break;
4306 case 3:
4307 idxGstRegBase = X86_GREG_xBP;
4308 idxGstRegIndex = X86_GREG_xDI;
4309 break;
4310 case 4:
4311 idxGstRegBase = X86_GREG_xSI;
4312 idxGstRegIndex = UINT8_MAX;
4313 break;
4314 case 5:
4315 idxGstRegBase = X86_GREG_xDI;
4316 idxGstRegIndex = UINT8_MAX;
4317 break;
4318 case 6:
4319 idxGstRegBase = X86_GREG_xBP;
4320 idxGstRegIndex = UINT8_MAX;
4321 break;
4322#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
4323 default:
4324#endif
4325 case 7:
4326 idxGstRegBase = X86_GREG_xBX;
4327 idxGstRegIndex = UINT8_MAX;
4328 break;
4329 }
4330
4331 /*
4332 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
4333 */
4334 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4335 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4336 kIemNativeGstRegUse_ReadOnly);
4337 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
4338 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4339 kIemNativeGstRegUse_ReadOnly)
4340 : UINT8_MAX;
4341#ifdef RT_ARCH_AMD64
4342 if (idxRegIndex == UINT8_MAX)
4343 {
4344 if (u16EffAddr == 0)
4345 {
4346 /* movxz ret, base */
4347 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
4348 }
4349 else
4350 {
4351 /* lea ret32, [base64 + disp32] */
4352 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4353 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4354 if (idxRegRet >= 8 || idxRegBase >= 8)
4355 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4356 pbCodeBuf[off++] = 0x8d;
4357 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4358 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
4359 else
4360 {
4361 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
4362 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4363 }
4364 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4365 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4366 pbCodeBuf[off++] = 0;
4367 pbCodeBuf[off++] = 0;
4368 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4369
4370 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4371 }
4372 }
4373 else
4374 {
4375 /* lea ret32, [index64 + base64 (+ disp32)] */
4376 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4377 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4378 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4379 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4380 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4381 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4382 pbCodeBuf[off++] = 0x8d;
4383 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
4384 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4385 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
4386 if (bMod == X86_MOD_MEM4)
4387 {
4388 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4389 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4390 pbCodeBuf[off++] = 0;
4391 pbCodeBuf[off++] = 0;
4392 }
4393 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4394 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4395 }
4396
4397#elif defined(RT_ARCH_ARM64)
4398 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4399 if (u16EffAddr == 0)
4400 {
4401 if (idxRegIndex == UINT8_MAX)
4402 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
4403 else
4404 {
4405 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
4406 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4407 }
4408 }
4409 else
4410 {
4411 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
4412 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
4413 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
4414 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4415 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
4416 else
4417 {
4418 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
4419 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4420 }
4421 if (idxRegIndex != UINT8_MAX)
4422 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
4423 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4424 }
4425
4426#else
4427# error "port me"
4428#endif
4429
4430 if (idxRegIndex != UINT8_MAX)
4431 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4432 iemNativeRegFreeTmp(pReNative, idxRegBase);
4433 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4434 return off;
4435}
4436
4437
4438#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
4439 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
4440
4441/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
4442 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
4443DECL_INLINE_THROW(uint32_t)
4444iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4445 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
4446{
4447 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4448
4449 /*
4450 * Handle the disp32 form with no registers first.
4451 *
4452 * Convert to an immediate value, as that'll delay the register allocation
4453 * and assignment till the memory access / call / whatever and we can use
4454 * a more appropriate register (or none at all).
4455 */
4456 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4457 {
4458 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
4459 return off;
4460 }
4461
4462 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4463 uint32_t u32EffAddr = 0;
4464 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4465 {
4466 case 0: break;
4467 case 1: u32EffAddr = (int8_t)u32Disp; break;
4468 case 2: u32EffAddr = u32Disp; break;
4469 default: AssertFailed();
4470 }
4471
4472 /* Get the register (or SIB) value. */
4473 uint8_t idxGstRegBase = UINT8_MAX;
4474 uint8_t idxGstRegIndex = UINT8_MAX;
4475 uint8_t cShiftIndex = 0;
4476 switch (bRm & X86_MODRM_RM_MASK)
4477 {
4478 case 0: idxGstRegBase = X86_GREG_xAX; break;
4479 case 1: idxGstRegBase = X86_GREG_xCX; break;
4480 case 2: idxGstRegBase = X86_GREG_xDX; break;
4481 case 3: idxGstRegBase = X86_GREG_xBX; break;
4482 case 4: /* SIB */
4483 {
4484 /* index /w scaling . */
4485 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4486 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4487 {
4488 case 0: idxGstRegIndex = X86_GREG_xAX; break;
4489 case 1: idxGstRegIndex = X86_GREG_xCX; break;
4490 case 2: idxGstRegIndex = X86_GREG_xDX; break;
4491 case 3: idxGstRegIndex = X86_GREG_xBX; break;
4492 case 4: cShiftIndex = 0; /*no index*/ break;
4493 case 5: idxGstRegIndex = X86_GREG_xBP; break;
4494 case 6: idxGstRegIndex = X86_GREG_xSI; break;
4495 case 7: idxGstRegIndex = X86_GREG_xDI; break;
4496 }
4497
4498 /* base */
4499 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
4500 {
4501 case 0: idxGstRegBase = X86_GREG_xAX; break;
4502 case 1: idxGstRegBase = X86_GREG_xCX; break;
4503 case 2: idxGstRegBase = X86_GREG_xDX; break;
4504 case 3: idxGstRegBase = X86_GREG_xBX; break;
4505 case 4:
4506 idxGstRegBase = X86_GREG_xSP;
4507 u32EffAddr += uSibAndRspOffset >> 8;
4508 break;
4509 case 5:
4510 if ((bRm & X86_MODRM_MOD_MASK) != 0)
4511 idxGstRegBase = X86_GREG_xBP;
4512 else
4513 {
4514 Assert(u32EffAddr == 0);
4515 u32EffAddr = u32Disp;
4516 }
4517 break;
4518 case 6: idxGstRegBase = X86_GREG_xSI; break;
4519 case 7: idxGstRegBase = X86_GREG_xDI; break;
4520 }
4521 break;
4522 }
4523 case 5: idxGstRegBase = X86_GREG_xBP; break;
4524 case 6: idxGstRegBase = X86_GREG_xSI; break;
4525 case 7: idxGstRegBase = X86_GREG_xDI; break;
4526 }
4527
4528 /*
4529 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4530 * the start of the function.
4531 */
4532 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4533 {
4534 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
4535 return off;
4536 }
4537
4538 /*
4539 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4540 */
4541 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4542 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4543 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4544 kIemNativeGstRegUse_ReadOnly);
4545 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4546 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4547 kIemNativeGstRegUse_ReadOnly);
4548
4549 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4550 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4551 {
4552 idxRegBase = idxRegIndex;
4553 idxRegIndex = UINT8_MAX;
4554 }
4555
4556#ifdef RT_ARCH_AMD64
4557 if (idxRegIndex == UINT8_MAX)
4558 {
4559 if (u32EffAddr == 0)
4560 {
4561 /* mov ret, base */
4562 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4563 }
4564 else
4565 {
4566 /* lea ret32, [base64 + disp32] */
4567 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4568 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4569 if (idxRegRet >= 8 || idxRegBase >= 8)
4570 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4571 pbCodeBuf[off++] = 0x8d;
4572 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4573 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4574 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4575 else
4576 {
4577 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4578 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4579 }
4580 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4581 if (bMod == X86_MOD_MEM4)
4582 {
4583 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4584 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4585 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4586 }
4587 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4588 }
4589 }
4590 else
4591 {
4592 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4593 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4594 if (idxRegBase == UINT8_MAX)
4595 {
4596 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
4597 if (idxRegRet >= 8 || idxRegIndex >= 8)
4598 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4599 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4600 pbCodeBuf[off++] = 0x8d;
4601 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4602 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4603 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4604 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4605 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4606 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4607 }
4608 else
4609 {
4610 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4611 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4612 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4613 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4614 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4615 pbCodeBuf[off++] = 0x8d;
4616 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4617 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4618 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4619 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4620 if (bMod != X86_MOD_MEM0)
4621 {
4622 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4623 if (bMod == X86_MOD_MEM4)
4624 {
4625 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4626 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4627 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4628 }
4629 }
4630 }
4631 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4632 }
4633
4634#elif defined(RT_ARCH_ARM64)
4635 if (u32EffAddr == 0)
4636 {
4637 if (idxRegIndex == UINT8_MAX)
4638 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4639 else if (idxRegBase == UINT8_MAX)
4640 {
4641 if (cShiftIndex == 0)
4642 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
4643 else
4644 {
4645 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4646 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
4647 }
4648 }
4649 else
4650 {
4651 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4652 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
4653 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4654 }
4655 }
4656 else
4657 {
4658 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
4659 {
4660 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4661 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
4662 }
4663 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
4664 {
4665 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4666 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4667 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
4668 }
4669 else
4670 {
4671 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
4672 if (idxRegBase != UINT8_MAX)
4673 {
4674 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4675 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4676 }
4677 }
4678 if (idxRegIndex != UINT8_MAX)
4679 {
4680 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4681 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
4682 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4683 }
4684 }
4685
4686#else
4687# error "port me"
4688#endif
4689
4690 if (idxRegIndex != UINT8_MAX)
4691 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4692 if (idxRegBase != UINT8_MAX)
4693 iemNativeRegFreeTmp(pReNative, idxRegBase);
4694 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4695 return off;
4696}
4697
4698
4699#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4700 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4701 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4702
4703#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4704 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4705 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4706
4707#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4708 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4709 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
4710
4711/**
4712 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
4713 *
4714 * @returns New off.
4715 * @param pReNative .
4716 * @param off .
4717 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
4718 * bit 4 to REX.X. The two bits are part of the
4719 * REG sub-field, which isn't needed in this
4720 * function.
4721 * @param uSibAndRspOffset Two parts:
4722 * - The first 8 bits make up the SIB byte.
4723 * - The next 8 bits are the fixed RSP/ESP offset
4724 * in case of a pop [xSP].
4725 * @param u32Disp The displacement byte/word/dword, if any.
4726 * @param cbInstr The size of the fully decoded instruction. Used
4727 * for RIP relative addressing.
4728 * @param idxVarRet The result variable number.
4729 * @param f64Bit Whether to use a 64-bit or 32-bit address size
4730 * when calculating the address.
4731 *
4732 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
4733 */
4734DECL_INLINE_THROW(uint32_t)
4735iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
4736 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
4737{
4738 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4739
4740 /*
4741 * Special case the rip + disp32 form first.
4742 */
4743 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4744 {
4745#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4746 /* Need to take the current PC offset into account for the displacement, no need to flush here
4747 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
4748 u32Disp += pReNative->Core.offPc;
4749#endif
4750
4751 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4752 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
4753 kIemNativeGstRegUse_ReadOnly);
4754#ifdef RT_ARCH_AMD64
4755 if (f64Bit)
4756 {
4757 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
4758 if ((int32_t)offFinalDisp == offFinalDisp)
4759 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
4760 else
4761 {
4762 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
4763 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
4764 }
4765 }
4766 else
4767 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
4768
4769#elif defined(RT_ARCH_ARM64)
4770 if (f64Bit)
4771 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4772 (int64_t)(int32_t)u32Disp + cbInstr);
4773 else
4774 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4775 (int32_t)u32Disp + cbInstr);
4776
4777#else
4778# error "Port me!"
4779#endif
4780 iemNativeRegFreeTmp(pReNative, idxRegPc);
4781 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4782 return off;
4783 }
4784
4785 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4786 int64_t i64EffAddr = 0;
4787 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4788 {
4789 case 0: break;
4790 case 1: i64EffAddr = (int8_t)u32Disp; break;
4791 case 2: i64EffAddr = (int32_t)u32Disp; break;
4792 default: AssertFailed();
4793 }
4794
4795 /* Get the register (or SIB) value. */
4796 uint8_t idxGstRegBase = UINT8_MAX;
4797 uint8_t idxGstRegIndex = UINT8_MAX;
4798 uint8_t cShiftIndex = 0;
4799 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
4800 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
4801 else /* SIB: */
4802 {
4803 /* index /w scaling . */
4804 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4805 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4806 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
4807 if (idxGstRegIndex == 4)
4808 {
4809 /* no index */
4810 cShiftIndex = 0;
4811 idxGstRegIndex = UINT8_MAX;
4812 }
4813
4814 /* base */
4815 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
4816 if (idxGstRegBase == 4)
4817 {
4818 /* pop [rsp] hack */
4819 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
4820 }
4821 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
4822 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
4823 {
4824 /* mod=0 and base=5 -> disp32, no base reg. */
4825 Assert(i64EffAddr == 0);
4826 i64EffAddr = (int32_t)u32Disp;
4827 idxGstRegBase = UINT8_MAX;
4828 }
4829 }
4830
4831 /*
4832 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4833 * the start of the function.
4834 */
4835 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4836 {
4837 if (f64Bit)
4838 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
4839 else
4840 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
4841 return off;
4842 }
4843
4844 /*
4845 * Now emit code that calculates:
4846 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4847 * or if !f64Bit:
4848 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4849 */
4850 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4851 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4852 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4853 kIemNativeGstRegUse_ReadOnly);
4854 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4855 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4856 kIemNativeGstRegUse_ReadOnly);
4857
4858 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4859 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4860 {
4861 idxRegBase = idxRegIndex;
4862 idxRegIndex = UINT8_MAX;
4863 }
4864
4865#ifdef RT_ARCH_AMD64
4866 uint8_t bFinalAdj;
4867 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
4868 bFinalAdj = 0; /* likely */
4869 else
4870 {
4871 /* pop [rsp] with a problematic disp32 value. Split out the
4872 RSP offset and add it separately afterwards (bFinalAdj). */
4873 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
4874 Assert(idxGstRegBase == X86_GREG_xSP);
4875 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
4876 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
4877 Assert(bFinalAdj != 0);
4878 i64EffAddr -= bFinalAdj;
4879 Assert((int32_t)i64EffAddr == i64EffAddr);
4880 }
4881 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
4882//pReNative->pInstrBuf[off++] = 0xcc;
4883
4884 if (idxRegIndex == UINT8_MAX)
4885 {
4886 if (u32EffAddr == 0)
4887 {
4888 /* mov ret, base */
4889 if (f64Bit)
4890 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
4891 else
4892 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4893 }
4894 else
4895 {
4896 /* lea ret, [base + disp32] */
4897 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4898 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4899 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
4900 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4901 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4902 | (f64Bit ? X86_OP_REX_W : 0);
4903 pbCodeBuf[off++] = 0x8d;
4904 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4905 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4906 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4907 else
4908 {
4909 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4910 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4911 }
4912 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4913 if (bMod == X86_MOD_MEM4)
4914 {
4915 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4916 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4917 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4918 }
4919 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4920 }
4921 }
4922 else
4923 {
4924 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4925 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4926 if (idxRegBase == UINT8_MAX)
4927 {
4928 /* lea ret, [(index64 << cShiftIndex) + disp32] */
4929 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
4930 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4931 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
4932 | (f64Bit ? X86_OP_REX_W : 0);
4933 pbCodeBuf[off++] = 0x8d;
4934 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4935 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4936 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4937 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4938 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4939 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4940 }
4941 else
4942 {
4943 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4944 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4945 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4946 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4947 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
4948 | (f64Bit ? X86_OP_REX_W : 0);
4949 pbCodeBuf[off++] = 0x8d;
4950 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4951 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4952 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4953 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4954 if (bMod != X86_MOD_MEM0)
4955 {
4956 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4957 if (bMod == X86_MOD_MEM4)
4958 {
4959 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4960 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4961 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4962 }
4963 }
4964 }
4965 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4966 }
4967
4968 if (!bFinalAdj)
4969 { /* likely */ }
4970 else
4971 {
4972 Assert(f64Bit);
4973 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
4974 }
4975
4976#elif defined(RT_ARCH_ARM64)
4977 if (i64EffAddr == 0)
4978 {
4979 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4980 if (idxRegIndex == UINT8_MAX)
4981 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
4982 else if (idxRegBase != UINT8_MAX)
4983 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
4984 f64Bit, false /*fSetFlags*/, cShiftIndex);
4985 else
4986 {
4987 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
4988 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
4989 }
4990 }
4991 else
4992 {
4993 if (f64Bit)
4994 { /* likely */ }
4995 else
4996 i64EffAddr = (int32_t)i64EffAddr;
4997
4998 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
4999 {
5000 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5001 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
5002 }
5003 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
5004 {
5005 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5006 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
5007 }
5008 else
5009 {
5010 if (f64Bit)
5011 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
5012 else
5013 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
5014 if (idxRegBase != UINT8_MAX)
5015 {
5016 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5017 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
5018 }
5019 }
5020 if (idxRegIndex != UINT8_MAX)
5021 {
5022 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5023 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5024 f64Bit, false /*fSetFlags*/, cShiftIndex);
5025 }
5026 }
5027
5028#else
5029# error "port me"
5030#endif
5031
5032 if (idxRegIndex != UINT8_MAX)
5033 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5034 if (idxRegBase != UINT8_MAX)
5035 iemNativeRegFreeTmp(pReNative, idxRegBase);
5036 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5037 return off;
5038}
5039
5040
5041/*********************************************************************************************************************************
5042* Memory fetches and stores common *
5043*********************************************************************************************************************************/
5044
5045typedef enum IEMNATIVEMITMEMOP
5046{
5047 kIemNativeEmitMemOp_Store = 0,
5048 kIemNativeEmitMemOp_Fetch,
5049 kIemNativeEmitMemOp_Fetch_Zx_U16,
5050 kIemNativeEmitMemOp_Fetch_Zx_U32,
5051 kIemNativeEmitMemOp_Fetch_Zx_U64,
5052 kIemNativeEmitMemOp_Fetch_Sx_U16,
5053 kIemNativeEmitMemOp_Fetch_Sx_U32,
5054 kIemNativeEmitMemOp_Fetch_Sx_U64
5055} IEMNATIVEMITMEMOP;
5056
5057/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
5058 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
5059 * (with iSegReg = UINT8_MAX). */
5060DECL_INLINE_THROW(uint32_t)
5061iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
5062 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
5063 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
5064{
5065 /*
5066 * Assert sanity.
5067 */
5068 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
5069 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
5070 Assert( enmOp != kIemNativeEmitMemOp_Store
5071 || pVarValue->enmKind == kIemNativeVarKind_Immediate
5072 || pVarValue->enmKind == kIemNativeVarKind_Stack);
5073 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
5074 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
5075 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
5076 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
5077 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5078 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
5079#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5080 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
5081 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
5082#else
5083 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
5084#endif
5085 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5086#ifdef VBOX_STRICT
5087 if (iSegReg == UINT8_MAX)
5088 {
5089 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
5090 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
5091 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
5092 switch (cbMem)
5093 {
5094 case 1:
5095 Assert( pfnFunction
5096 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
5097 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5098 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5099 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5100 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5101 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
5102 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
5103 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
5104 : UINT64_C(0xc000b000a0009000) ));
5105 break;
5106 case 2:
5107 Assert( pfnFunction
5108 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
5109 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5110 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5111 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5112 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
5113 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
5114 : UINT64_C(0xc000b000a0009000) ));
5115 break;
5116 case 4:
5117 Assert( pfnFunction
5118 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
5119 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
5120 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
5121 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
5122 : UINT64_C(0xc000b000a0009000) ));
5123 break;
5124 case 8:
5125 Assert( pfnFunction
5126 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
5127 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
5128 : UINT64_C(0xc000b000a0009000) ));
5129 break;
5130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5131 case sizeof(RTUINT128U):
5132 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5133 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
5134 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
5135 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
5136 || ( enmOp == kIemNativeEmitMemOp_Store
5137 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
5138 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
5139 break;
5140 case sizeof(RTUINT256U):
5141 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5142 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
5143 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
5144 || ( enmOp == kIemNativeEmitMemOp_Store
5145 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
5146 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
5147 break;
5148#endif
5149 }
5150 }
5151 else
5152 {
5153 Assert(iSegReg < 6);
5154 switch (cbMem)
5155 {
5156 case 1:
5157 Assert( pfnFunction
5158 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
5159 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
5160 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5161 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5162 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5163 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
5164 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
5165 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
5166 : UINT64_C(0xc000b000a0009000) ));
5167 break;
5168 case 2:
5169 Assert( pfnFunction
5170 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
5171 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
5172 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
5173 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
5174 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
5175 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
5176 : UINT64_C(0xc000b000a0009000) ));
5177 break;
5178 case 4:
5179 Assert( pfnFunction
5180 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
5181 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
5182 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
5183 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
5184 : UINT64_C(0xc000b000a0009000) ));
5185 break;
5186 case 8:
5187 Assert( pfnFunction
5188 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
5189 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
5190 : UINT64_C(0xc000b000a0009000) ));
5191 break;
5192#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5193 case sizeof(RTUINT128U):
5194 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5195 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
5196 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
5197 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
5198 || ( enmOp == kIemNativeEmitMemOp_Store
5199 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
5200 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
5201 break;
5202 case sizeof(RTUINT256U):
5203 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5204 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
5205 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
5206 || ( enmOp == kIemNativeEmitMemOp_Store
5207 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
5208 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
5209 break;
5210#endif
5211 }
5212 }
5213#endif
5214
5215#ifdef VBOX_STRICT
5216 /*
5217 * Check that the fExec flags we've got make sense.
5218 */
5219 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
5220#endif
5221
5222 /*
5223 * To keep things simple we have to commit any pending writes first as we
5224 * may end up making calls.
5225 */
5226 /** @todo we could postpone this till we make the call and reload the
5227 * registers after returning from the call. Not sure if that's sensible or
5228 * not, though. */
5229#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5230 off = iemNativeRegFlushPendingWrites(pReNative, off);
5231#else
5232 /* The program counter is treated differently for now. */
5233 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
5234#endif
5235
5236#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5237 /*
5238 * Move/spill/flush stuff out of call-volatile registers.
5239 * This is the easy way out. We could contain this to the tlb-miss branch
5240 * by saving and restoring active stuff here.
5241 */
5242 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
5243#endif
5244
5245 /*
5246 * Define labels and allocate the result register (trying for the return
5247 * register if we can).
5248 */
5249 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
5250#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5251 uint8_t idxRegValueFetch = UINT8_MAX;
5252
5253 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5254 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5255 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
5256 else
5257 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5258 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5259 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5260 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5261#else
5262 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5263 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5264 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5265 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5266#endif
5267 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
5268
5269#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5270 uint8_t idxRegValueStore = UINT8_MAX;
5271
5272 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5273 idxRegValueStore = !TlbState.fSkip
5274 && enmOp == kIemNativeEmitMemOp_Store
5275 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5276 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5277 : UINT8_MAX;
5278 else
5279 idxRegValueStore = !TlbState.fSkip
5280 && enmOp == kIemNativeEmitMemOp_Store
5281 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5282 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5283 : UINT8_MAX;
5284
5285#else
5286 uint8_t const idxRegValueStore = !TlbState.fSkip
5287 && enmOp == kIemNativeEmitMemOp_Store
5288 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5289 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5290 : UINT8_MAX;
5291#endif
5292 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
5293 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
5294 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
5295 : UINT32_MAX;
5296
5297 /*
5298 * Jump to the TLB lookup code.
5299 */
5300 if (!TlbState.fSkip)
5301 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
5302
5303 /*
5304 * TlbMiss:
5305 *
5306 * Call helper to do the fetching.
5307 * We flush all guest register shadow copies here.
5308 */
5309 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
5310
5311#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5312 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5313#else
5314 RT_NOREF(idxInstr);
5315#endif
5316
5317#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5318 if (pReNative->Core.offPc)
5319 {
5320 /*
5321 * Update the program counter but restore it at the end of the TlbMiss branch.
5322 * This should allow delaying more program counter updates for the TlbLookup and hit paths
5323 * which are hopefully much more frequent, reducing the amount of memory accesses.
5324 */
5325 /* Allocate a temporary PC register. */
5326 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5327
5328 /* Perform the addition and store the result. */
5329 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5330 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5331
5332 /* Free and flush the PC register. */
5333 iemNativeRegFreeTmp(pReNative, idxPcReg);
5334 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5335 }
5336#endif
5337
5338#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5339 /* Save variables in volatile registers. */
5340 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
5341 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
5342 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
5343 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
5344#endif
5345
5346 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
5347 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5348#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5349 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5350 {
5351 /*
5352 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
5353 *
5354 * @note There was a register variable assigned to the variable for the TlbLookup case above
5355 * which must not be freed or the value loaded into the register will not be synced into the register
5356 * further down the road because the variable doesn't know it had a variable assigned.
5357 *
5358 * @note For loads it is not required to sync what is in the assigned register with the stack slot
5359 * as it will be overwritten anyway.
5360 */
5361 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5362 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
5363 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
5364 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5365 }
5366 else
5367#endif
5368 if (enmOp == kIemNativeEmitMemOp_Store)
5369 {
5370 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5371 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
5372#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5373 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5374#else
5375 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
5376 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5377#endif
5378 }
5379
5380 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
5381 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
5382#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5383 fVolGregMask);
5384#else
5385 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
5386#endif
5387
5388 if (iSegReg != UINT8_MAX)
5389 {
5390 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
5391 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
5392 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
5393 }
5394
5395 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
5396 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5397
5398 /* Done setting up parameters, make the call. */
5399 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
5400
5401 /*
5402 * Put the result in the right register if this is a fetch.
5403 */
5404 if (enmOp != kIemNativeEmitMemOp_Store)
5405 {
5406#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5407 if ( cbMem == sizeof(RTUINT128U)
5408 || cbMem == sizeof(RTUINT256U))
5409 {
5410 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
5411
5412 /* Sync the value on the stack with the host register assigned to the variable. */
5413 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
5414 }
5415 else
5416#endif
5417 {
5418 Assert(idxRegValueFetch == pVarValue->idxReg);
5419 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
5420 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
5421 }
5422 }
5423
5424#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5425 /* Restore variables and guest shadow registers to volatile registers. */
5426 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
5427 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
5428#endif
5429
5430#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5431 if (pReNative->Core.offPc)
5432 {
5433 /*
5434 * Time to restore the program counter to its original value.
5435 */
5436 /* Allocate a temporary PC register. */
5437 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5438
5439 /* Restore the original value. */
5440 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5441 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5442
5443 /* Free and flush the PC register. */
5444 iemNativeRegFreeTmp(pReNative, idxPcReg);
5445 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5446 }
5447#endif
5448
5449#ifdef IEMNATIVE_WITH_TLB_LOOKUP
5450 if (!TlbState.fSkip)
5451 {
5452 /* end of TlbMiss - Jump to the done label. */
5453 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
5454 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
5455
5456 /*
5457 * TlbLookup:
5458 */
5459 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
5460 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
5461 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
5462
5463 /*
5464 * Emit code to do the actual storing / fetching.
5465 */
5466 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
5467# ifdef VBOX_WITH_STATISTICS
5468 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
5469 enmOp == kIemNativeEmitMemOp_Store
5470 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
5471 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
5472# endif
5473 switch (enmOp)
5474 {
5475 case kIemNativeEmitMemOp_Store:
5476 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
5477 {
5478 switch (cbMem)
5479 {
5480 case 1:
5481 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5482 break;
5483 case 2:
5484 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5485 break;
5486 case 4:
5487 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5488 break;
5489 case 8:
5490 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5491 break;
5492#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5493 case sizeof(RTUINT128U):
5494 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5495 break;
5496 case sizeof(RTUINT256U):
5497 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5498 break;
5499#endif
5500 default:
5501 AssertFailed();
5502 }
5503 }
5504 else
5505 {
5506 switch (cbMem)
5507 {
5508 case 1:
5509 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
5510 idxRegMemResult, TlbState.idxReg1);
5511 break;
5512 case 2:
5513 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
5514 idxRegMemResult, TlbState.idxReg1);
5515 break;
5516 case 4:
5517 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
5518 idxRegMemResult, TlbState.idxReg1);
5519 break;
5520 case 8:
5521 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
5522 idxRegMemResult, TlbState.idxReg1);
5523 break;
5524 default:
5525 AssertFailed();
5526 }
5527 }
5528 break;
5529
5530 case kIemNativeEmitMemOp_Fetch:
5531 case kIemNativeEmitMemOp_Fetch_Zx_U16:
5532 case kIemNativeEmitMemOp_Fetch_Zx_U32:
5533 case kIemNativeEmitMemOp_Fetch_Zx_U64:
5534 switch (cbMem)
5535 {
5536 case 1:
5537 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5538 break;
5539 case 2:
5540 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5541 break;
5542 case 4:
5543 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5544 break;
5545 case 8:
5546 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5547 break;
5548#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5549 case sizeof(RTUINT128U):
5550 /*
5551 * No need to sync back the register with the stack, this is done by the generic variable handling
5552 * code if there is a register assigned to a variable and the stack must be accessed.
5553 */
5554 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5555 break;
5556 case sizeof(RTUINT256U):
5557 /*
5558 * No need to sync back the register with the stack, this is done by the generic variable handling
5559 * code if there is a register assigned to a variable and the stack must be accessed.
5560 */
5561 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5562 break;
5563#endif
5564 default:
5565 AssertFailed();
5566 }
5567 break;
5568
5569 case kIemNativeEmitMemOp_Fetch_Sx_U16:
5570 Assert(cbMem == 1);
5571 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5572 break;
5573
5574 case kIemNativeEmitMemOp_Fetch_Sx_U32:
5575 Assert(cbMem == 1 || cbMem == 2);
5576 if (cbMem == 1)
5577 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5578 else
5579 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5580 break;
5581
5582 case kIemNativeEmitMemOp_Fetch_Sx_U64:
5583 switch (cbMem)
5584 {
5585 case 1:
5586 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5587 break;
5588 case 2:
5589 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5590 break;
5591 case 4:
5592 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5593 break;
5594 default:
5595 AssertFailed();
5596 }
5597 break;
5598
5599 default:
5600 AssertFailed();
5601 }
5602
5603 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
5604
5605 /*
5606 * TlbDone:
5607 */
5608 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
5609
5610 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
5611
5612# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5613 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
5614 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5615# endif
5616 }
5617#else
5618 RT_NOREF(fAlignMask, idxLabelTlbMiss);
5619#endif
5620
5621 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
5622 iemNativeVarRegisterRelease(pReNative, idxVarValue);
5623 return off;
5624}
5625
5626
5627
5628/*********************************************************************************************************************************
5629* Memory fetches (IEM_MEM_FETCH_XXX). *
5630*********************************************************************************************************************************/
5631
5632/* 8-bit segmented: */
5633#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
5634 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
5635 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5636 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5637
5638#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5639 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5640 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5641 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5642
5643#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5644 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5645 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5646 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5647
5648#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5649 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5650 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5651 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5652
5653#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5654 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5655 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5656 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5657
5658#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5659 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5660 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5661 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5662
5663#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5664 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5665 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5666 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5667
5668/* 16-bit segmented: */
5669#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5670 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5671 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5672 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5673
5674#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5675 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5676 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5677 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5678
5679#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5680 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5681 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5682 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5683
5684#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5685 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5686 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5687 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5688
5689#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5690 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5691 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5692 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5693
5694#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5695 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5696 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5697 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5698
5699
5700/* 32-bit segmented: */
5701#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5702 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5703 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5704 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5705
5706#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5707 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5708 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5709 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5710
5711#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5712 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5713 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5714 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5715
5716#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5717 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5718 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5719 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5720
5721AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
5722#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
5723 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
5724 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
5725 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5726
5727
5728/* 64-bit segmented: */
5729#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5730 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5731 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5732 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5733
5734AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
5735#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
5736 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
5737 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
5738 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5739
5740
5741/* 8-bit flat: */
5742#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
5743 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
5744 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5745 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5746
5747#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
5748 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5749 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5750 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5751
5752#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
5753 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5754 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5755 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5756
5757#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
5758 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5759 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5760 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5761
5762#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
5763 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5764 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5765 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5766
5767#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
5768 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5769 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5770 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5771
5772#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
5773 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5774 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5775 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5776
5777
5778/* 16-bit flat: */
5779#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
5780 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5781 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5782 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5783
5784#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
5785 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5786 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5787 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5788
5789#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
5790 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5791 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5792 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5793
5794#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
5795 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5796 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5797 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5798
5799#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
5800 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5801 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5802 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5803
5804#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
5805 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5806 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5807 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5808
5809/* 32-bit flat: */
5810#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
5811 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5812 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5813 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5814
5815#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
5816 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5817 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5818 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5819
5820#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
5821 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5822 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5823 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5824
5825#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
5826 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5827 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5828 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5829
5830#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
5831 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
5832 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
5833 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5834
5835
5836/* 64-bit flat: */
5837#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
5838 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5839 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5840 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
5841
5842#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
5843 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
5844 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
5845 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
5846
5847#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5848/* 128-bit segmented: */
5849#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
5850 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5851 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5852 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
5853
5854#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
5855 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5856 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5857 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
5858
5859AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
5860#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
5861 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, \
5862 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
5863 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
5864
5865#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
5866 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5867 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5868 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
5869
5870/* 128-bit flat: */
5871#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
5872 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
5873 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5874 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
5875
5876#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
5877 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
5878 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5879 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
5880
5881#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
5882 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
5883 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
5884 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
5885
5886#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
5887 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
5888 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5889 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
5890
5891/* 256-bit segmented: */
5892#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
5893 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
5894 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5895 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
5896
5897#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
5898 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
5899 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5900 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
5901
5902#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
5903 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
5904 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5905 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
5906
5907
5908/* 256-bit flat: */
5909#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
5910 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
5911 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5912 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
5913
5914#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
5915 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
5916 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5917 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
5918
5919#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
5920 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
5921 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5922 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
5923#endif
5924
5925
5926/*********************************************************************************************************************************
5927* Memory stores (IEM_MEM_STORE_XXX). *
5928*********************************************************************************************************************************/
5929
5930#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
5931 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
5932 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
5933 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
5934
5935#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
5936 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
5937 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
5938 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
5939
5940#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
5941 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
5942 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
5943 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
5944
5945#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
5946 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
5947 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
5948 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
5949
5950
5951#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
5952 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
5953 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
5954 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
5955
5956#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
5957 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
5958 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
5959 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
5960
5961#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
5962 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
5963 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
5964 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
5965
5966#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
5967 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
5968 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
5969 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
5970
5971
5972#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
5973 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
5974 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
5975
5976#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
5977 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
5978 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
5979
5980#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
5981 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
5982 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
5983
5984#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
5985 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
5986 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
5987
5988
5989#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
5990 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
5991 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
5992
5993#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
5994 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
5995 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
5996
5997#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
5998 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
5999 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
6000
6001#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
6002 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6003 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
6004
6005/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
6006 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
6007DECL_INLINE_THROW(uint32_t)
6008iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
6009 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
6010{
6011 /*
6012 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
6013 * to do the grunt work.
6014 */
6015 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
6016 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
6017 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
6018 pfnFunction, idxInstr);
6019 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
6020 return off;
6021}
6022
6023
6024#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6025# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
6026 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
6027 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6028 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
6029
6030# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
6031 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
6032 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6033 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
6034
6035# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
6036 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
6037 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6038 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
6039
6040# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
6041 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
6042 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6043 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
6044
6045
6046# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
6047 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
6048 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6049 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
6050
6051# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
6052 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
6053 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6054 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
6055
6056# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
6057 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
6058 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6059 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
6060
6061# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
6062 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
6063 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6064 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
6065#endif
6066
6067
6068
6069/*********************************************************************************************************************************
6070* Stack Accesses. *
6071*********************************************************************************************************************************/
6072/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
6073#define IEM_MC_PUSH_U16(a_u16Value) \
6074 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6075 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
6076#define IEM_MC_PUSH_U32(a_u32Value) \
6077 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6078 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
6079#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
6080 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
6081 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
6082#define IEM_MC_PUSH_U64(a_u64Value) \
6083 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6084 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
6085
6086#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
6087 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6088 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
6089#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
6090 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6091 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
6092#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
6093 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
6094 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
6095
6096#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
6097 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6098 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
6099#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
6100 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6101 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
6102
6103
6104DECL_FORCE_INLINE_THROW(uint32_t)
6105iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6106{
6107 /* Use16BitSp: */
6108#ifdef RT_ARCH_AMD64
6109 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6110 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6111#else
6112 /* sub regeff, regrsp, #cbMem */
6113 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
6114 /* and regeff, regeff, #0xffff */
6115 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6116 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
6117 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
6118 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
6119#endif
6120 return off;
6121}
6122
6123
6124DECL_FORCE_INLINE(uint32_t)
6125iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6126{
6127 /* Use32BitSp: */
6128 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6129 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6130 return off;
6131}
6132
6133
6134/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
6135DECL_INLINE_THROW(uint32_t)
6136iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
6137 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6138{
6139 /*
6140 * Assert sanity.
6141 */
6142 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6143 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6144#ifdef VBOX_STRICT
6145 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6146 {
6147 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6148 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6149 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6150 Assert( pfnFunction
6151 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
6152 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
6153 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
6154 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
6155 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
6156 : UINT64_C(0xc000b000a0009000) ));
6157 }
6158 else
6159 Assert( pfnFunction
6160 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
6161 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
6162 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
6163 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
6164 : UINT64_C(0xc000b000a0009000) ));
6165#endif
6166
6167#ifdef VBOX_STRICT
6168 /*
6169 * Check that the fExec flags we've got make sense.
6170 */
6171 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6172#endif
6173
6174 /*
6175 * To keep things simple we have to commit any pending writes first as we
6176 * may end up making calls.
6177 */
6178 /** @todo we could postpone this till we make the call and reload the
6179 * registers after returning from the call. Not sure if that's sensible or
6180 * not, though. */
6181 off = iemNativeRegFlushPendingWrites(pReNative, off);
6182
6183 /*
6184 * First we calculate the new RSP and the effective stack pointer value.
6185 * For 64-bit mode and flat 32-bit these two are the same.
6186 * (Code structure is very similar to that of PUSH)
6187 */
6188 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6189 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
6190 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
6191 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
6192 ? cbMem : sizeof(uint16_t);
6193 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6194 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6195 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6196 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6197 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6198 if (cBitsFlat != 0)
6199 {
6200 Assert(idxRegEffSp == idxRegRsp);
6201 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6202 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6203 if (cBitsFlat == 64)
6204 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
6205 else
6206 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
6207 }
6208 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6209 {
6210 Assert(idxRegEffSp != idxRegRsp);
6211 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6212 kIemNativeGstRegUse_ReadOnly);
6213#ifdef RT_ARCH_AMD64
6214 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6215#else
6216 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6217#endif
6218 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6219 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6220 offFixupJumpToUseOtherBitSp = off;
6221 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6222 {
6223 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6224 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6225 }
6226 else
6227 {
6228 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6229 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6230 }
6231 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6232 }
6233 /* SpUpdateEnd: */
6234 uint32_t const offLabelSpUpdateEnd = off;
6235
6236 /*
6237 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6238 * we're skipping lookup).
6239 */
6240 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6241 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
6242 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6243 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6244 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6245 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6246 : UINT32_MAX;
6247 uint8_t const idxRegValue = !TlbState.fSkip
6248 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6249 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
6250 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
6251 : UINT8_MAX;
6252 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6253
6254
6255 if (!TlbState.fSkip)
6256 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6257 else
6258 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6259
6260 /*
6261 * Use16BitSp:
6262 */
6263 if (cBitsFlat == 0)
6264 {
6265#ifdef RT_ARCH_AMD64
6266 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6267#else
6268 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6269#endif
6270 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6271 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6272 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6273 else
6274 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6275 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6276 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6277 }
6278
6279 /*
6280 * TlbMiss:
6281 *
6282 * Call helper to do the pushing.
6283 */
6284 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6285
6286#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6287 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6288#else
6289 RT_NOREF(idxInstr);
6290#endif
6291
6292 /* Save variables in volatile registers. */
6293 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6294 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6295 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
6296 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
6297 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6298
6299 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
6300 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
6301 {
6302 /* Swap them using ARG0 as temp register: */
6303 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
6304 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
6305 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
6306 }
6307 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
6308 {
6309 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
6310 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
6311 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6312
6313 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
6314 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6315 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6316 }
6317 else
6318 {
6319 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
6320 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6321
6322 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
6323 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
6324 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
6325 }
6326
6327 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6328 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6329
6330 /* Done setting up parameters, make the call. */
6331 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6332
6333 /* Restore variables and guest shadow registers to volatile registers. */
6334 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6335 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6336
6337#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6338 if (!TlbState.fSkip)
6339 {
6340 /* end of TlbMiss - Jump to the done label. */
6341 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6342 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6343
6344 /*
6345 * TlbLookup:
6346 */
6347 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
6348 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6349
6350 /*
6351 * Emit code to do the actual storing / fetching.
6352 */
6353 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6354# ifdef VBOX_WITH_STATISTICS
6355 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6356 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6357# endif
6358 if (idxRegValue != UINT8_MAX)
6359 {
6360 switch (cbMemAccess)
6361 {
6362 case 2:
6363 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6364 break;
6365 case 4:
6366 if (!fIsIntelSeg)
6367 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6368 else
6369 {
6370 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
6371 PUSH FS in real mode, so we have to try emulate that here.
6372 We borrow the now unused idxReg1 from the TLB lookup code here. */
6373 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
6374 kIemNativeGstReg_EFlags);
6375 if (idxRegEfl != UINT8_MAX)
6376 {
6377#ifdef ARCH_AMD64
6378 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
6379 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6380 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6381#else
6382 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
6383 off, TlbState.idxReg1, idxRegEfl,
6384 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6385#endif
6386 iemNativeRegFreeTmp(pReNative, idxRegEfl);
6387 }
6388 else
6389 {
6390 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
6391 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
6392 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6393 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6394 }
6395 /* ASSUMES the upper half of idxRegValue is ZERO. */
6396 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
6397 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
6398 }
6399 break;
6400 case 8:
6401 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6402 break;
6403 default:
6404 AssertFailed();
6405 }
6406 }
6407 else
6408 {
6409 switch (cbMemAccess)
6410 {
6411 case 2:
6412 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6413 idxRegMemResult, TlbState.idxReg1);
6414 break;
6415 case 4:
6416 Assert(!fIsSegReg);
6417 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6418 idxRegMemResult, TlbState.idxReg1);
6419 break;
6420 case 8:
6421 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
6422 break;
6423 default:
6424 AssertFailed();
6425 }
6426 }
6427
6428 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6429 TlbState.freeRegsAndReleaseVars(pReNative);
6430
6431 /*
6432 * TlbDone:
6433 *
6434 * Commit the new RSP value.
6435 */
6436 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6437 }
6438#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6439
6440#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6441 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
6442#endif
6443 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6444 if (idxRegEffSp != idxRegRsp)
6445 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6446
6447 /* The value variable is implictly flushed. */
6448 if (idxRegValue != UINT8_MAX)
6449 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6450 iemNativeVarFreeLocal(pReNative, idxVarValue);
6451
6452 return off;
6453}
6454
6455
6456
6457/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
6458#define IEM_MC_POP_GREG_U16(a_iGReg) \
6459 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6460 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
6461#define IEM_MC_POP_GREG_U32(a_iGReg) \
6462 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6463 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
6464#define IEM_MC_POP_GREG_U64(a_iGReg) \
6465 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6466 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
6467
6468#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
6469 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6470 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6471#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
6472 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6473 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
6474
6475#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
6476 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6477 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6478#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
6479 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6480 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
6481
6482
6483DECL_FORCE_INLINE_THROW(uint32_t)
6484iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
6485 uint8_t idxRegTmp)
6486{
6487 /* Use16BitSp: */
6488#ifdef RT_ARCH_AMD64
6489 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6490 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6491 RT_NOREF(idxRegTmp);
6492#else
6493 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
6494 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
6495 /* add tmp, regrsp, #cbMem */
6496 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
6497 /* and tmp, tmp, #0xffff */
6498 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6499 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
6500 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
6501 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
6502#endif
6503 return off;
6504}
6505
6506
6507DECL_FORCE_INLINE(uint32_t)
6508iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6509{
6510 /* Use32BitSp: */
6511 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6512 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6513 return off;
6514}
6515
6516
6517/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
6518DECL_INLINE_THROW(uint32_t)
6519iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
6520 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6521{
6522 /*
6523 * Assert sanity.
6524 */
6525 Assert(idxGReg < 16);
6526#ifdef VBOX_STRICT
6527 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6528 {
6529 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6530 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6531 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6532 Assert( pfnFunction
6533 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6534 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
6535 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6536 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
6537 : UINT64_C(0xc000b000a0009000) ));
6538 }
6539 else
6540 Assert( pfnFunction
6541 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
6542 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
6543 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
6544 : UINT64_C(0xc000b000a0009000) ));
6545#endif
6546
6547#ifdef VBOX_STRICT
6548 /*
6549 * Check that the fExec flags we've got make sense.
6550 */
6551 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6552#endif
6553
6554 /*
6555 * To keep things simple we have to commit any pending writes first as we
6556 * may end up making calls.
6557 */
6558 off = iemNativeRegFlushPendingWrites(pReNative, off);
6559
6560 /*
6561 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
6562 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
6563 * directly as the effective stack pointer.
6564 * (Code structure is very similar to that of PUSH)
6565 */
6566 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6567 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6568 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6569 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6570 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6571 /** @todo can do a better job picking the register here. For cbMem >= 4 this
6572 * will be the resulting register value. */
6573 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
6574
6575 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6576 if (cBitsFlat != 0)
6577 {
6578 Assert(idxRegEffSp == idxRegRsp);
6579 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6580 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6581 }
6582 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6583 {
6584 Assert(idxRegEffSp != idxRegRsp);
6585 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6586 kIemNativeGstRegUse_ReadOnly);
6587#ifdef RT_ARCH_AMD64
6588 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6589#else
6590 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6591#endif
6592 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6593 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6594 offFixupJumpToUseOtherBitSp = off;
6595 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6596 {
6597/** @todo can skip idxRegRsp updating when popping ESP. */
6598 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6599 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6600 }
6601 else
6602 {
6603 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6604 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6605 }
6606 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6607 }
6608 /* SpUpdateEnd: */
6609 uint32_t const offLabelSpUpdateEnd = off;
6610
6611 /*
6612 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6613 * we're skipping lookup).
6614 */
6615 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6616 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
6617 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6618 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6619 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6620 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6621 : UINT32_MAX;
6622
6623 if (!TlbState.fSkip)
6624 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6625 else
6626 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6627
6628 /*
6629 * Use16BitSp:
6630 */
6631 if (cBitsFlat == 0)
6632 {
6633#ifdef RT_ARCH_AMD64
6634 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6635#else
6636 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6637#endif
6638 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6639 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6640 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6641 else
6642 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6643 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6644 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6645 }
6646
6647 /*
6648 * TlbMiss:
6649 *
6650 * Call helper to do the pushing.
6651 */
6652 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6653
6654#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6655 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6656#else
6657 RT_NOREF(idxInstr);
6658#endif
6659
6660 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6661 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6662 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
6663 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6664
6665
6666 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
6667 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6668 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6669
6670 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6671 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6672
6673 /* Done setting up parameters, make the call. */
6674 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6675
6676 /* Move the return register content to idxRegMemResult. */
6677 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
6678 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
6679
6680 /* Restore variables and guest shadow registers to volatile registers. */
6681 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6682 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6683
6684#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6685 if (!TlbState.fSkip)
6686 {
6687 /* end of TlbMiss - Jump to the done label. */
6688 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6689 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6690
6691 /*
6692 * TlbLookup:
6693 */
6694 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
6695 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6696
6697 /*
6698 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
6699 */
6700 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6701# ifdef VBOX_WITH_STATISTICS
6702 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6703 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6704# endif
6705 switch (cbMem)
6706 {
6707 case 2:
6708 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6709 break;
6710 case 4:
6711 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6712 break;
6713 case 8:
6714 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6715 break;
6716 default:
6717 AssertFailed();
6718 }
6719
6720 TlbState.freeRegsAndReleaseVars(pReNative);
6721
6722 /*
6723 * TlbDone:
6724 *
6725 * Set the new RSP value (FLAT accesses needs to calculate it first) and
6726 * commit the popped register value.
6727 */
6728 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6729 }
6730#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6731
6732 if (idxGReg != X86_GREG_xSP)
6733 {
6734 /* Set the register. */
6735 if (cbMem >= sizeof(uint32_t))
6736 {
6737#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6738 AssertMsg( pReNative->idxCurCall == 0
6739 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
6740 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
6741#endif
6742 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
6743#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6744 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
6745#endif
6746#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6747 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
6748 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6749#endif
6750 }
6751 else
6752 {
6753 Assert(cbMem == sizeof(uint16_t));
6754 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
6755 kIemNativeGstRegUse_ForUpdate);
6756 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
6757#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6758 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6759#endif
6760 iemNativeRegFreeTmp(pReNative, idxRegDst);
6761 }
6762
6763 /* Complete RSP calculation for FLAT mode. */
6764 if (idxRegEffSp == idxRegRsp)
6765 {
6766 if (cBitsFlat == 64)
6767 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6768 else
6769 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6770 }
6771 }
6772 else
6773 {
6774 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
6775 if (cbMem == sizeof(uint64_t))
6776 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
6777 else if (cbMem == sizeof(uint32_t))
6778 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
6779 else
6780 {
6781 if (idxRegEffSp == idxRegRsp)
6782 {
6783 if (cBitsFlat == 64)
6784 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6785 else
6786 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6787 }
6788 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
6789 }
6790 }
6791
6792#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6793 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
6794#endif
6795
6796 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6797 if (idxRegEffSp != idxRegRsp)
6798 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6799 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6800
6801 return off;
6802}
6803
6804
6805
6806/*********************************************************************************************************************************
6807* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
6808*********************************************************************************************************************************/
6809
6810#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6811 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6812 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
6813 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
6814
6815#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6816 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6817 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
6818 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
6819
6820#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6821 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6822 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
6823 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
6824
6825#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6826 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6827 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
6828 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
6829
6830
6831#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6832 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6833 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6834 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
6835
6836#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6837 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6838 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6839 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
6840
6841#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6842 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6843 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6844 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
6845
6846#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6847 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6848 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6849 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
6850
6851#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6852 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
6853 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6854 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
6855
6856
6857#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6858 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6859 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6860 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
6861
6862#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6863 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6864 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6865 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
6866
6867#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6868 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6869 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6870 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
6871
6872#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6873 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6874 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6875 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
6876
6877#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6878 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
6879 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6880 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
6881
6882
6883#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6884 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6885 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6886 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
6887
6888#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6889 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6890 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6891 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
6892#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6893 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6894 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6895 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
6896
6897#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6898 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6899 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6900 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
6901
6902#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6903 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
6904 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6905 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
6906
6907
6908#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6909 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
6910 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6911 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
6912
6913#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6914 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
6915 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
6916 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
6917
6918
6919#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6920 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6921 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6922 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
6923
6924#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6925 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6926 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6927 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
6928
6929#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6930 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6931 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6932 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
6933
6934#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6935 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6936 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6937 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
6938
6939
6940
6941#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6942 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6943 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
6944 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
6945
6946#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6947 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6948 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
6949 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
6950
6951#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6952 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6953 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
6954 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
6955
6956#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6957 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6958 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
6959 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
6960
6961
6962#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6963 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6964 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6965 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
6966
6967#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6968 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6969 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6970 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
6971
6972#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6973 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6974 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6975 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
6976
6977#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6978 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6979 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6980 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
6981
6982#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
6983 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
6984 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6985 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
6986
6987
6988#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6989 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6990 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6991 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
6992
6993#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6994 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6995 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6996 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
6997
6998#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6999 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7000 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7001 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
7002
7003#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7004 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7005 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7006 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
7007
7008#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
7009 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
7010 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7011 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
7012
7013
7014#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7015 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7016 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7017 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
7018
7019#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7020 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7021 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7022 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
7023
7024#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7025 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7026 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7027 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
7028
7029#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7030 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7031 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7032 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
7033
7034#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
7035 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
7036 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7037 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
7038
7039
7040#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
7041 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
7042 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7043 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
7044
7045#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
7046 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
7047 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
7048 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
7049
7050
7051#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7052 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7053 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7054 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
7055
7056#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7057 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7058 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7059 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
7060
7061#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7062 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7063 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7064 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
7065
7066#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7067 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7068 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7069 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
7070
7071
7072DECL_INLINE_THROW(uint32_t)
7073iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
7074 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
7075 uintptr_t pfnFunction, uint8_t idxInstr)
7076{
7077 /*
7078 * Assert sanity.
7079 */
7080 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
7081 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
7082 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
7083 && pVarMem->cbVar == sizeof(void *),
7084 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7085
7086 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7087 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7088 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
7089 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
7090 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7091
7092 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7093 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7094 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7095 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7096 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7097
7098 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7099
7100 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7101
7102#ifdef VBOX_STRICT
7103# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
7104 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
7105 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
7106 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
7107 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
7108# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
7109 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
7110 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
7111 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
7112
7113 if (iSegReg == UINT8_MAX)
7114 {
7115 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7116 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7117 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7118 switch (cbMem)
7119 {
7120 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
7121 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
7122 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
7123 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
7124 case 10:
7125 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
7126 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
7127 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
7128 break;
7129 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
7130# if 0
7131 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
7132 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
7133# endif
7134 default: AssertFailed(); break;
7135 }
7136 }
7137 else
7138 {
7139 Assert(iSegReg < 6);
7140 switch (cbMem)
7141 {
7142 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
7143 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
7144 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
7145 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
7146 case 10:
7147 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
7148 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
7149 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
7150 break;
7151 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
7152# if 0
7153 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
7154 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
7155# endif
7156 default: AssertFailed(); break;
7157 }
7158 }
7159# undef IEM_MAP_HLP_FN
7160# undef IEM_MAP_HLP_FN_NO_AT
7161#endif
7162
7163#ifdef VBOX_STRICT
7164 /*
7165 * Check that the fExec flags we've got make sense.
7166 */
7167 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7168#endif
7169
7170 /*
7171 * To keep things simple we have to commit any pending writes first as we
7172 * may end up making calls.
7173 */
7174 off = iemNativeRegFlushPendingWrites(pReNative, off);
7175
7176#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7177 /*
7178 * Move/spill/flush stuff out of call-volatile registers.
7179 * This is the easy way out. We could contain this to the tlb-miss branch
7180 * by saving and restoring active stuff here.
7181 */
7182 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
7183 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7184#endif
7185
7186 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
7187 while the tlb-miss codepath will temporarily put it on the stack.
7188 Set the the type to stack here so we don't need to do it twice below. */
7189 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
7190 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
7191 /** @todo use a tmp register from TlbState, since they'll be free after tlb
7192 * lookup is done. */
7193
7194 /*
7195 * Define labels and allocate the result register (trying for the return
7196 * register if we can).
7197 */
7198 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7199 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7200 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
7201 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
7202 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
7203 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7204 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7205 : UINT32_MAX;
7206//off=iemNativeEmitBrk(pReNative, off, 0);
7207 /*
7208 * Jump to the TLB lookup code.
7209 */
7210 if (!TlbState.fSkip)
7211 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7212
7213 /*
7214 * TlbMiss:
7215 *
7216 * Call helper to do the fetching.
7217 * We flush all guest register shadow copies here.
7218 */
7219 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7220
7221#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7222 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7223#else
7224 RT_NOREF(idxInstr);
7225#endif
7226
7227#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7228 /* Save variables in volatile registers. */
7229 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
7230 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7231#endif
7232
7233 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
7234 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
7235#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7236 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7237#else
7238 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7239#endif
7240
7241 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
7242 if (iSegReg != UINT8_MAX)
7243 {
7244 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7245 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
7246 }
7247
7248 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
7249 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
7250 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
7251
7252 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7253 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7254
7255 /* Done setting up parameters, make the call. */
7256 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7257
7258 /*
7259 * Put the output in the right registers.
7260 */
7261 Assert(idxRegMemResult == pVarMem->idxReg);
7262 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7263 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7264
7265#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7266 /* Restore variables and guest shadow registers to volatile registers. */
7267 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7268 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7269#endif
7270
7271 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
7272 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
7273
7274#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7275 if (!TlbState.fSkip)
7276 {
7277 /* end of tlbsmiss - Jump to the done label. */
7278 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7279 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7280
7281 /*
7282 * TlbLookup:
7283 */
7284 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
7285 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7286# ifdef VBOX_WITH_STATISTICS
7287 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
7288 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
7289# endif
7290
7291 /* [idxVarUnmapInfo] = 0; */
7292 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
7293
7294 /*
7295 * TlbDone:
7296 */
7297 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7298
7299 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7300
7301# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7302 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7303 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7304# endif
7305 }
7306#else
7307 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
7308#endif
7309
7310 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7311 iemNativeVarRegisterRelease(pReNative, idxVarMem);
7312
7313 return off;
7314}
7315
7316
7317#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
7318 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
7319 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
7320
7321#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
7322 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
7323 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
7324
7325#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
7326 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
7327 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
7328
7329#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
7330 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
7331 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
7332
7333DECL_INLINE_THROW(uint32_t)
7334iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
7335 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
7336{
7337 /*
7338 * Assert sanity.
7339 */
7340 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7341#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
7342 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7343#endif
7344 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
7345 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
7346 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
7347#ifdef VBOX_STRICT
7348 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
7349 {
7350 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
7351 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
7352 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
7353 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
7354 case IEM_ACCESS_TYPE_WRITE:
7355 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
7356 case IEM_ACCESS_TYPE_READ:
7357 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
7358 default: AssertFailed();
7359 }
7360#else
7361 RT_NOREF(fAccess);
7362#endif
7363
7364 /*
7365 * To keep things simple we have to commit any pending writes first as we
7366 * may end up making calls (there shouldn't be any at this point, so this
7367 * is just for consistency).
7368 */
7369 /** @todo we could postpone this till we make the call and reload the
7370 * registers after returning from the call. Not sure if that's sensible or
7371 * not, though. */
7372 off = iemNativeRegFlushPendingWrites(pReNative, off);
7373
7374 /*
7375 * Move/spill/flush stuff out of call-volatile registers.
7376 *
7377 * We exclude any register holding the bUnmapInfo variable, as we'll be
7378 * checking it after returning from the call and will free it afterwards.
7379 */
7380 /** @todo save+restore active registers and maybe guest shadows in miss
7381 * scenario. */
7382 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
7383 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
7384
7385 /*
7386 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
7387 * to call the unmap helper function.
7388 *
7389 * The likelyhood of it being zero is higher than for the TLB hit when doing
7390 * the mapping, as a TLB miss for an well aligned and unproblematic memory
7391 * access should also end up with a mapping that won't need special unmapping.
7392 */
7393 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
7394 * should speed up things for the pure interpreter as well when TLBs
7395 * are enabled. */
7396#ifdef RT_ARCH_AMD64
7397 if (pVarUnmapInfo->idxReg == UINT8_MAX)
7398 {
7399 /* test byte [rbp - xxx], 0ffh */
7400 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7401 pbCodeBuf[off++] = 0xf6;
7402 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
7403 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
7404 pbCodeBuf[off++] = 0xff;
7405 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7406 }
7407 else
7408#endif
7409 {
7410 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
7411 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
7412 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
7413 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7414 }
7415 uint32_t const offJmpFixup = off;
7416 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
7417
7418 /*
7419 * Call the unmap helper function.
7420 */
7421#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
7422 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7423#else
7424 RT_NOREF(idxInstr);
7425#endif
7426
7427 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
7428 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
7429 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7430
7431 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7432 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7433
7434 /* Done setting up parameters, make the call. */
7435 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7436
7437 /* The bUnmapInfo variable is implictly free by these MCs. */
7438 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
7439
7440 /*
7441 * Done, just fixup the jump for the non-call case.
7442 */
7443 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
7444
7445 return off;
7446}
7447
7448
7449
7450/*********************************************************************************************************************************
7451* State and Exceptions *
7452*********************************************************************************************************************************/
7453
7454#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7455#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7456
7457#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7458#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7459#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7460
7461#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7462#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7463#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7464
7465
7466DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
7467{
7468 /** @todo this needs a lot more work later. */
7469 RT_NOREF(pReNative, fForChange);
7470 return off;
7471}
7472
7473
7474
7475/*********************************************************************************************************************************
7476* Emitters for FPU related operations. *
7477*********************************************************************************************************************************/
7478
7479#define IEM_MC_FETCH_FCW(a_u16Fcw) \
7480 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
7481
7482/** Emits code for IEM_MC_FETCH_FCW. */
7483DECL_INLINE_THROW(uint32_t)
7484iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7485{
7486 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7487 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7488
7489 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7490
7491 /* Allocate a temporary FCW register. */
7492 /** @todo eliminate extra register */
7493 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
7494 kIemNativeGstRegUse_ReadOnly);
7495
7496 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
7497
7498 /* Free but don't flush the FCW register. */
7499 iemNativeRegFreeTmp(pReNative, idxFcwReg);
7500 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7501
7502 return off;
7503}
7504
7505
7506#define IEM_MC_FETCH_FSW(a_u16Fsw) \
7507 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
7508
7509/** Emits code for IEM_MC_FETCH_FSW. */
7510DECL_INLINE_THROW(uint32_t)
7511iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7512{
7513 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7514 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7515
7516 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
7517 /* Allocate a temporary FSW register. */
7518 /** @todo eliminate extra register */
7519 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
7520 kIemNativeGstRegUse_ReadOnly);
7521
7522 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
7523
7524 /* Free but don't flush the FSW register. */
7525 iemNativeRegFreeTmp(pReNative, idxFswReg);
7526 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7527
7528 return off;
7529}
7530
7531
7532
7533#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7534
7535
7536/*********************************************************************************************************************************
7537* Emitters for SSE/AVX specific operations. *
7538*********************************************************************************************************************************/
7539
7540#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
7541 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
7542
7543/** Emits code for IEM_MC_COPY_XREG_U128. */
7544DECL_INLINE_THROW(uint32_t)
7545iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
7546{
7547 /* This is a nop if the source and destination register are the same. */
7548 if (iXRegDst != iXRegSrc)
7549 {
7550 /* Allocate destination and source register. */
7551 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
7552 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7553 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
7554 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7555
7556 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7557
7558 /* Free but don't flush the source and destination register. */
7559 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7560 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7561 }
7562
7563 return off;
7564}
7565
7566
7567#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
7568 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
7569
7570/** Emits code for IEM_MC_FETCH_XREG_U128. */
7571DECL_INLINE_THROW(uint32_t)
7572iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
7573{
7574 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7575 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
7576
7577 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7578 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7579
7580 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
7581
7582 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
7583
7584 /* Free but don't flush the source register. */
7585 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7586 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
7587
7588 return off;
7589}
7590
7591
7592#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
7593 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
7594
7595/** Emits code for IEM_MC_FETCH_XREG_U64. */
7596DECL_INLINE_THROW(uint32_t)
7597iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
7598{
7599 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7600 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7601
7602 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7603 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7604
7605 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7606 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7607
7608 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
7609
7610 /* Free but don't flush the source register. */
7611 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7612 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7613
7614 return off;
7615}
7616
7617
7618#define IEM_MC_FETCH_XREG_U32(a_u64Value, a_iXReg, a_iDWord) \
7619 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u64Value, a_iXReg, a_iDWord)
7620
7621/** Emits code for IEM_MC_FETCH_XREG_U32. */
7622DECL_INLINE_THROW(uint32_t)
7623iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
7624{
7625 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7626 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7627
7628 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7629 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7630
7631 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7632 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7633
7634 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
7635
7636 /* Free but don't flush the source register. */
7637 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7638 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7639
7640 return off;
7641}
7642
7643
7644#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
7645 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
7646
7647/** Emits code for IEM_MC_FETCH_XREG_U16. */
7648DECL_INLINE_THROW(uint32_t)
7649iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
7650{
7651 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7652 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7653
7654 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7655 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7656
7657 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7658 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7659
7660 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
7661
7662 /* Free but don't flush the source register. */
7663 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7664 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7665
7666 return off;
7667}
7668
7669
7670#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
7671 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
7672
7673/** Emits code for IEM_MC_FETCH_XREG_U8. */
7674DECL_INLINE_THROW(uint32_t)
7675iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
7676{
7677 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7678 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
7679
7680 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7681 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7682
7683 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7684 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7685
7686 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
7687
7688 /* Free but don't flush the source register. */
7689 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7690 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7691
7692 return off;
7693}
7694
7695
7696#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
7697 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
7698
7699AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7700#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
7701 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
7702
7703
7704/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
7705DECL_INLINE_THROW(uint32_t)
7706iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7707{
7708 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7709 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7710
7711 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7712 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7713
7714 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
7715
7716 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
7717
7718 /* Free but don't flush the source register. */
7719 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7720 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7721
7722 return off;
7723}
7724
7725
7726#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
7727 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
7728
7729#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
7730 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
7731
7732#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
7733 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
7734
7735#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
7736 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
7737
7738#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
7739 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
7740
7741#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
7742 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
7743
7744/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
7745DECL_INLINE_THROW(uint32_t)
7746iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar, uint8_t cbLocal, uint8_t iElem)
7747{
7748 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7749 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
7750
7751#ifdef VBOX_STRICT
7752 switch (cbLocal)
7753 {
7754 case sizeof(uint64_t): Assert(iElem < 2); break;
7755 case sizeof(uint32_t): Assert(iElem < 4); break;
7756 case sizeof(uint16_t): Assert(iElem < 8); break;
7757 case sizeof(uint8_t): Assert(iElem < 16); break;
7758 default: AssertFailed();
7759 }
7760#endif
7761
7762 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7763 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7764
7765 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7766
7767 switch (cbLocal)
7768 {
7769 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7770 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7771 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7772 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7773 default: AssertFailed();
7774 }
7775
7776 /* Free but don't flush the source register. */
7777 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7778 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7779
7780 return off;
7781}
7782
7783
7784#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
7785 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
7786
7787/** Emits code for IEM_MC_STORE_XREG_U32. */
7788DECL_INLINE_THROW(uint32_t)
7789iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7790{
7791 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7792 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7793
7794 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7795 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7796
7797 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7798
7799 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
7800 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7801 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7802
7803 /* Free but don't flush the source register. */
7804 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7805 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7806
7807 return off;
7808}
7809
7810
7811#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
7812 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
7813
7814/** Emits code for IEM_MC_STORE_XREG_U32. */
7815DECL_INLINE_THROW(uint32_t)
7816iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7817{
7818 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7819 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7820
7821 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7822 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7823
7824 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7825
7826 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
7827 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7828 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7829
7830 /* Free but don't flush the source register. */
7831 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7832 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7833
7834 return off;
7835}
7836
7837
7838#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
7839 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
7840
7841/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
7842DECL_INLINE_THROW(uint32_t)
7843iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst, uint8_t idxSrcVar, uint8_t iDwSrc)
7844{
7845 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7846 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7847
7848 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7849 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7850
7851 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
7852
7853 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
7854 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
7855
7856 /* Free but don't flush the destination register. */
7857 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7858 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7859
7860 return off;
7861}
7862
7863
7864#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
7865 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
7866
7867/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
7868DECL_INLINE_THROW(uint32_t)
7869iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
7870{
7871 /*
7872 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
7873 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
7874 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
7875 */
7876 if (iYRegDst != iYRegSrc)
7877 {
7878 /* Allocate destination and source register. */
7879 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7880 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7881 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
7882 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7883
7884 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7885 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7886
7887 /* Free but don't flush the source and destination register. */
7888 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7889 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7890 }
7891 else
7892 {
7893 /* This effectively only clears the upper 128-bits of the register. */
7894 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7895 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
7896
7897 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
7898
7899 /* Free but don't flush the destination register. */
7900 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
7901 }
7902
7903 return off;
7904}
7905
7906
7907#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
7908 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
7909
7910/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
7911DECL_INLINE_THROW(uint32_t)
7912iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
7913{
7914 /*
7915 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
7916 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
7917 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
7918 * iYRegSrc == iYRegDst would effectively only clear any upper 256-bits for a zmm register we don't support yet, so this is just a nop.
7919 */
7920 if (iYRegDst != iYRegSrc)
7921 {
7922 /* Allocate destination and source register. */
7923 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
7924 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
7925 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7926 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7927
7928 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7929
7930 /* Free but don't flush the source and destination register. */
7931 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7932 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7933 }
7934
7935 return off;
7936}
7937
7938
7939#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
7940 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
7941
7942/** Emits code for IEM_MC_FETCH_YREG_U128. */
7943DECL_INLINE_THROW(uint32_t)
7944iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
7945{
7946 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7947 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
7948
7949 Assert(iDQWord <= 1);
7950 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7951 iDQWord == 1
7952 ? kIemNativeGstSimdRegLdStSz_High128
7953 : kIemNativeGstSimdRegLdStSz_Low128,
7954 kIemNativeGstRegUse_ReadOnly);
7955
7956 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7957 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
7958
7959 if (iDQWord == 1)
7960 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
7961 else
7962 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
7963
7964 /* Free but don't flush the source register. */
7965 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7966 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
7967
7968 return off;
7969}
7970
7971
7972#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
7973 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
7974
7975/** Emits code for IEM_MC_FETCH_YREG_U64. */
7976DECL_INLINE_THROW(uint32_t)
7977iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
7978{
7979 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7980 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7981
7982 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7983 iQWord >= 2
7984 ? kIemNativeGstSimdRegLdStSz_High128
7985 : kIemNativeGstSimdRegLdStSz_Low128,
7986 kIemNativeGstRegUse_ReadOnly);
7987
7988 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7989 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7990
7991 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
7992
7993 /* Free but don't flush the source register. */
7994 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7995 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7996
7997 return off;
7998}
7999
8000
8001#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
8002 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
8003
8004/** Emits code for IEM_MC_FETCH_YREG_U32. */
8005DECL_INLINE_THROW(uint32_t)
8006iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
8007{
8008 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8009 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8010
8011 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8012 iDWord >= 4
8013 ? kIemNativeGstSimdRegLdStSz_High128
8014 : kIemNativeGstSimdRegLdStSz_Low128,
8015 kIemNativeGstRegUse_ReadOnly);
8016
8017 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8018 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8019
8020 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
8021
8022 /* Free but don't flush the source register. */
8023 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8024 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8025
8026 return off;
8027}
8028
8029
8030#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
8031 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
8032
8033/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
8034DECL_INLINE_THROW(uint32_t)
8035iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
8036{
8037 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8038 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
8039
8040 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
8041
8042 /* Free but don't flush the register. */
8043 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
8044
8045 return off;
8046}
8047
8048
8049#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
8050 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
8051
8052/** Emits code for IEM_MC_STORE_YREG_U128. */
8053DECL_INLINE_THROW(uint32_t)
8054iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
8055{
8056 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8057 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8058
8059 Assert(iDQword <= 1);
8060 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8061 iDQword == 0
8062 ? kIemNativeGstSimdRegLdStSz_Low128
8063 : kIemNativeGstSimdRegLdStSz_High128,
8064 kIemNativeGstRegUse_ForFullWrite);
8065
8066 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8067
8068 if (iDQword == 0)
8069 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8070 else
8071 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
8072
8073 /* Free but don't flush the source register. */
8074 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8075 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8076
8077 return off;
8078}
8079
8080
8081#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8082 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8083
8084/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
8085DECL_INLINE_THROW(uint32_t)
8086iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8087{
8088 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8089 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8090
8091 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8092 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8093
8094 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8095
8096 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8097 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8098
8099 /* Free but don't flush the source register. */
8100 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8101 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8102
8103 return off;
8104}
8105
8106
8107#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
8108 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
8109
8110/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
8111DECL_INLINE_THROW(uint32_t)
8112iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8113{
8114 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8115 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
8116
8117 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8118 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8119
8120 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8121
8122 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8123 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8124
8125 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8126 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8127
8128 return off;
8129}
8130
8131
8132#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
8133 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
8134
8135/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
8136DECL_INLINE_THROW(uint32_t)
8137iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8138{
8139 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8140 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8141
8142 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8143 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8144
8145 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8146
8147 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8148 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8149
8150 /* Free but don't flush the source register. */
8151 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8152 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8153
8154 return off;
8155}
8156
8157
8158#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
8159 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
8160
8161/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
8162DECL_INLINE_THROW(uint32_t)
8163iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8164{
8165 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8166 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8167
8168 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8169 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8170
8171 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8172
8173 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8174 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8175
8176 /* Free but don't flush the source register. */
8177 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8178 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8179
8180 return off;
8181}
8182
8183
8184#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
8185 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
8186
8187/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
8188DECL_INLINE_THROW(uint32_t)
8189iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8190{
8191 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8192 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8193
8194 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8195 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8196
8197 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8198
8199 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8200 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8201
8202 /* Free but don't flush the source register. */
8203 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8204 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8205
8206 return off;
8207}
8208
8209
8210#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
8211 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
8212
8213/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
8214DECL_INLINE_THROW(uint32_t)
8215iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8216{
8217 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8218 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
8219
8220 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8221 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8222
8223 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8224
8225 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8226
8227 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8228 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8229
8230 return off;
8231}
8232
8233
8234#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
8235 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
8236
8237/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
8238DECL_INLINE_THROW(uint32_t)
8239iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8240{
8241 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8242 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8243
8244 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8245 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8246
8247 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8248
8249 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8250
8251 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8252 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8253
8254 return off;
8255}
8256
8257
8258#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8259 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8260
8261/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
8262DECL_INLINE_THROW(uint32_t)
8263iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8264{
8265 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8266 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8267
8268 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8269 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8270
8271 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8272
8273 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8274
8275 /* Free but don't flush the source register. */
8276 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8277 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8278
8279 return off;
8280}
8281
8282
8283#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8284 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8285
8286/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
8287DECL_INLINE_THROW(uint32_t)
8288iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8289{
8290 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8291 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8292
8293 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8294 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8295
8296 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8297
8298 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8299
8300 /* Free but don't flush the source register. */
8301 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8302 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8303
8304 return off;
8305}
8306
8307
8308#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8309 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8310
8311/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
8312DECL_INLINE_THROW(uint32_t)
8313iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8314{
8315 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8316 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8317
8318 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8319 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8320
8321 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8322
8323 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
8324
8325 /* Free but don't flush the source register. */
8326 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8327 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8328
8329 return off;
8330}
8331
8332
8333#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8334 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8335
8336/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
8337DECL_INLINE_THROW(uint32_t)
8338iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8339{
8340 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8341 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8342
8343 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8344 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8345
8346 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8347
8348 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8349 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
8350
8351 /* Free but don't flush the source register. */
8352 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8353 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8354
8355 return off;
8356}
8357
8358
8359#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8360 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8361
8362/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
8363DECL_INLINE_THROW(uint32_t)
8364iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8365{
8366 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8367 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8368
8369 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8370 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8371
8372 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8373
8374 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8375 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8376
8377 /* Free but don't flush the source register. */
8378 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8379 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8380
8381 return off;
8382}
8383
8384
8385#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
8386 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
8387
8388/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
8389DECL_INLINE_THROW(uint32_t)
8390iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
8391{
8392 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8393 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8394
8395 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8396 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8397 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8398 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8399 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8400
8401 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8402 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8403 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8404
8405 /* Free but don't flush the source and destination registers. */
8406 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8407 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8408 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8409
8410 return off;
8411}
8412
8413
8414#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
8415 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
8416
8417/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
8418DECL_INLINE_THROW(uint32_t)
8419iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
8420{
8421 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8422 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8423
8424 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8425 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8426 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8427 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8428 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8429
8430 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8431 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
8432 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8433
8434 /* Free but don't flush the source and destination registers. */
8435 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8436 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8437 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8438
8439 return off;
8440}
8441
8442
8443#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
8444 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
8445
8446
8447/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
8448DECL_INLINE_THROW(uint32_t)
8449iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
8450{
8451 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8452 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8453
8454 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
8455 if (bImm8Mask & RT_BIT(0))
8456 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
8457 if (bImm8Mask & RT_BIT(1))
8458 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
8459 if (bImm8Mask & RT_BIT(2))
8460 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
8461 if (bImm8Mask & RT_BIT(3))
8462 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
8463
8464 /* Free but don't flush the destination register. */
8465 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8466
8467 return off;
8468}
8469
8470
8471#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
8472 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
8473
8474
8475/** Emits code for IEM_MC_FETCH_YREG_U256. */
8476DECL_INLINE_THROW(uint32_t)
8477iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
8478{
8479 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8480 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
8481
8482 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8483 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
8484 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8485
8486 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
8487
8488 /* Free but don't flush the source register. */
8489 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8490 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8491
8492 return off;
8493}
8494
8495
8496#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
8497 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
8498
8499
8500/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX. */
8501DECL_INLINE_THROW(uint32_t)
8502iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
8503{
8504 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8505 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8506
8507 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8508 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8509 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8510
8511 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
8512
8513 /* Free but don't flush the source register. */
8514 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8515 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8516
8517 return off;
8518}
8519
8520
8521#define IEM_MC_SSE_UPDATE_MXCSR(a_fMxcsr) \
8522 off = iemNativeEmitSimdSseUpdateMxcsr(pReNative, off, a_fMxcsr)
8523
8524/** Emits code for IEM_MC_SSE_UPDATE_MXCSR. */
8525DECL_INLINE_THROW(uint32_t)
8526iemNativeEmitSimdSseUpdateMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxMxCsrVar)
8527{
8528 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxMxCsrVar);
8529 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxMxCsrVar, sizeof(uint32_t));
8530
8531 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ForUpdate);
8532 uint8_t const idxVarRegMxCsr = iemNativeVarRegisterAcquire(pReNative, idxMxCsrVar, &off, true /*fInitalized*/);
8533 uint8_t const idxVarRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8534
8535 /** @todo r=aeichner I think it would be safe to spare the temporary register and trash
8536 * the variable MXCSR register as it isn't used afterwards in the microcode block anyway.
8537 * Needs verification though, so play it safe for now.
8538 */
8539 /* mov tmp, varmxcsr */
8540 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarRegTmp, idxVarRegMxCsr);
8541 /* and tmp, X86_MXCSR_XCPT_FLAGS */
8542 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarRegTmp, X86_MXCSR_XCPT_FLAGS);
8543 /* or mxcsr, tmp */
8544 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxVarRegTmp);
8545
8546 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
8547 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
8548
8549 /* Free but don't flush the MXCSR register. */
8550 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8551 iemNativeVarRegisterRelease(pReNative, idxMxCsrVar);
8552 iemNativeRegFreeTmp(pReNative, idxVarRegTmp);
8553
8554 return off;
8555}
8556
8557
8558#define IEM_MC_STORE_SSE_RESULT(a_SseData, a_iXmmReg) \
8559 off = iemNativeEmitSimdSseStoreResult(pReNative, off, a_SseData, a_iXmmReg)
8560
8561/** Emits code for IEM_MC_STORE_SSE_RESULT. */
8562DECL_INLINE_THROW(uint32_t)
8563iemNativeEmitSimdSseStoreResult(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSseDataVar, uint8_t iXReg)
8564{
8565 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSseDataVar);
8566 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSseDataVar, sizeof(IEMSSERESULT));
8567
8568 /** @todo r=aeichner We probably need to rework this MC statement and the users to make thing more efficient. */
8569 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8570 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
8571 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ForUpdate);
8572 uint8_t const idxVarRegResAddr = iemNativeRegAllocTmp(pReNative, &off);
8573 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8574
8575 off = iemNativeEmitLoadArgGregWithVarAddr(pReNative, off, idxVarRegResAddr, idxSseDataVar, false /*fFlushShadows*/);
8576
8577 /* Update MXCSR. */
8578 off = iemNativeEmitLoadGprByGprU32(pReNative, off, idxRegTmp, idxVarRegResAddr, RT_UOFFSETOF_DYN(IEMSSERESULT, MXCSR));
8579 /* tmp &= X86_MXCSR_XCPT_FLAGS. */
8580 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
8581 /* mxcsr |= tmp */
8582 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
8583
8584 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
8585 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
8586
8587 /* Update the value if there is no unmasked exception. */
8588 /* tmp = mxcsr */
8589 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
8590 /* tmp &= X86_MXCSR_XCPT_MASK */
8591 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
8592 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
8593 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
8594 /* tmp = ~tmp */
8595 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
8596 /* tmp &= mxcsr */
8597 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
8598
8599 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
8600 uint32_t offFixup = off;
8601 off = iemNativeEmitJnzToFixed(pReNative, off, off);
8602 AssertCompileMemberSize(IEMSSERESULT, uResult, sizeof(RTFLOAT128U));
8603 off = iemNativeEmitLoadVecRegByGprU128(pReNative, off, idxSimdRegDst, idxVarRegResAddr, RT_UOFFSETOF_DYN(IEMSSERESULT, uResult));
8604 iemNativeFixupFixedJump(pReNative, offFixup, off);
8605
8606 /* Free but don't flush the shadowed register. */
8607 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8608 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8609 iemNativeRegFreeTmp(pReNative, idxVarRegResAddr);
8610 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8611
8612 return off;
8613}
8614
8615
8616/*********************************************************************************************************************************
8617* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
8618*********************************************************************************************************************************/
8619
8620/**
8621 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX.
8622 */
8623DECL_INLINE_THROW(uint32_t)
8624iemNativeEmitCallSseAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
8625{
8626 /*
8627 * Need to do the FPU preparation.
8628 */
8629 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
8630
8631 /*
8632 * Do all the call setup and cleanup.
8633 */
8634 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS);
8635
8636 /*
8637 * Load the XState::x87 pointer.
8638 */
8639 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, kIemNativeGstRegRef_X87, 0 /*idxRegInClass*/);
8640
8641 /*
8642 * Make the call.
8643 */
8644 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8645
8646 return off;
8647}
8648
8649
8650#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
8651 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8652
8653/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
8654DECL_INLINE_THROW(uint32_t)
8655iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8656{
8657 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8658 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8659 return iemNativeEmitCallSseAImplCommon(pReNative, off, pfnAImpl, 2);
8660}
8661
8662
8663#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8664 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8665
8666/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
8667DECL_INLINE_THROW(uint32_t)
8668iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8669{
8670 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8671 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8672 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8673 return iemNativeEmitCallSseAImplCommon(pReNative, off, pfnAImpl, 3);
8674}
8675
8676
8677/*********************************************************************************************************************************
8678* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
8679*********************************************************************************************************************************/
8680
8681/**
8682 * Common worker for IEM_MC_CALL_AVX_AIMPL_XXX.
8683 */
8684DECL_INLINE_THROW(uint32_t)
8685iemNativeEmitCallAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
8686{
8687 /*
8688 * Need to do the FPU preparation.
8689 */
8690 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
8691
8692 /*
8693 * Do all the call setup and cleanup.
8694 */
8695 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_AVX_AIMPL_HIDDEN_ARGS, IEM_AVX_AIMPL_HIDDEN_ARGS);
8696
8697 /*
8698 * Load the XState pointer.
8699 */
8700 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, kIemNativeGstRegRef_XState, 0 /*idxRegInClass*/);
8701
8702 /*
8703 * Make the call.
8704 */
8705 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8706
8707 return off;
8708}
8709
8710
8711#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
8712 off = iemNativeEmitCallAvxAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8713
8714/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
8715DECL_INLINE_THROW(uint32_t)
8716iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8717{
8718 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8719 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8720 return iemNativeEmitCallAvxAImplCommon(pReNative, off, pfnAImpl, 2);
8721}
8722
8723
8724#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8725 off = iemNativeEmitCallAvxAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8726
8727/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
8728DECL_INLINE_THROW(uint32_t)
8729iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8730{
8731 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8732 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8733 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8734 return iemNativeEmitCallAvxAImplCommon(pReNative, off, pfnAImpl, 3);
8735}
8736#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8737
8738
8739/*********************************************************************************************************************************
8740* Include instruction emitters. *
8741*********************************************************************************************************************************/
8742#include "target-x86/IEMAllN8veEmit-x86.h"
8743
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette