VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 104075

Last change on this file since 104075 was 104075, checked in by vboxsync, 11 months ago

VMM/IEM: Implement native emitter for IEM_MC_FETCH_MEM_U256()/IEM_MC_FETCH_MEM_FLAT_U256(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 418.3 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 104075 2024-03-27 08:15:42Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
117#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
118 if ( enmClass == kIemNativeGstRegRef_EFlags
119 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
120 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
121#else
122 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
123#endif
124
125 if ( enmClass == kIemNativeGstRegRef_Gpr
126 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
127 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
128#endif
129
130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
131 if ( enmClass == kIemNativeGstRegRef_XReg
132 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
133 {
134 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
135 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
136 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
137
138 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
139 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
140 }
141#endif
142 RT_NOREF(pReNative, enmClass, idxReg);
143 return off;
144}
145
146
147
148/*********************************************************************************************************************************
149* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
150*********************************************************************************************************************************/
151
152#undef IEM_MC_BEGIN /* unused */
153#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
154 { \
155 Assert(pReNative->Core.bmVars == 0); \
156 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
157 Assert(pReNative->Core.bmStack == 0); \
158 pReNative->fMc = (a_fMcFlags); \
159 pReNative->fCImpl = (a_fCImplFlags); \
160 pReNative->cArgsX = (a_cArgsIncludingHidden)
161
162/** We have to get to the end in recompilation mode, as otherwise we won't
163 * generate code for all the IEM_MC_IF_XXX branches. */
164#define IEM_MC_END() \
165 iemNativeVarFreeAll(pReNative); \
166 } return off
167
168
169
170/*********************************************************************************************************************************
171* Native Emitter Support. *
172*********************************************************************************************************************************/
173
174#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
175
176#define IEM_MC_NATIVE_ELSE() } else {
177
178#define IEM_MC_NATIVE_ENDIF() } ((void)0)
179
180
181#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
182 off = a_fnEmitter(pReNative, off)
183
184#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
185 off = a_fnEmitter(pReNative, off, (a0))
186
187#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1))
189
190#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
192
193#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
194 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
195
196#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
198
199#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
201
202#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
204
205#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
207
208
209
210/*********************************************************************************************************************************
211* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
212*********************************************************************************************************************************/
213
214#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
215 pReNative->fMc = 0; \
216 pReNative->fCImpl = (a_fFlags); \
217 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
218
219
220#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
221 pReNative->fMc = 0; \
222 pReNative->fCImpl = (a_fFlags); \
223 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
224
225DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
226 uint8_t idxInstr, uint64_t a_fGstShwFlush,
227 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
228{
229 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
230}
231
232
233#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
234 pReNative->fMc = 0; \
235 pReNative->fCImpl = (a_fFlags); \
236 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
237 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
238
239DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
240 uint8_t idxInstr, uint64_t a_fGstShwFlush,
241 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
242{
243 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
244}
245
246
247#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
248 pReNative->fMc = 0; \
249 pReNative->fCImpl = (a_fFlags); \
250 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
251 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
252
253DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
254 uint8_t idxInstr, uint64_t a_fGstShwFlush,
255 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
256 uint64_t uArg2)
257{
258 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
259}
260
261
262
263/*********************************************************************************************************************************
264* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
265*********************************************************************************************************************************/
266
267/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
268 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
269DECL_INLINE_THROW(uint32_t)
270iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
271{
272 /*
273 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
274 * return with special status code and make the execution loop deal with
275 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
276 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
277 * could continue w/o interruption, it probably will drop into the
278 * debugger, so not worth the effort of trying to services it here and we
279 * just lump it in with the handling of the others.
280 *
281 * To simplify the code and the register state management even more (wrt
282 * immediate in AND operation), we always update the flags and skip the
283 * extra check associated conditional jump.
284 */
285 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
286 <= UINT32_MAX);
287#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
288 AssertMsg( pReNative->idxCurCall == 0
289 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
290 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
291#endif
292
293 /*
294 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
295 * any pending register writes must be flushed.
296 */
297 off = iemNativeRegFlushPendingWrites(pReNative, off);
298
299 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
300 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
301 true /*fSkipLivenessAssert*/);
302 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
303 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
304 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
305 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
306 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
307
308 /* Free but don't flush the EFLAGS register. */
309 iemNativeRegFreeTmp(pReNative, idxEflReg);
310
311 return off;
312}
313
314
315/** The VINF_SUCCESS dummy. */
316template<int const a_rcNormal>
317DECL_FORCE_INLINE(uint32_t)
318iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
319{
320 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
321 if (a_rcNormal != VINF_SUCCESS)
322 {
323#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
324 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
325#else
326 RT_NOREF_PV(idxInstr);
327#endif
328
329 /* As this code returns from the TB any pending register writes must be flushed. */
330 off = iemNativeRegFlushPendingWrites(pReNative, off);
331
332 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
333 }
334 return off;
335}
336
337
338#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
339 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
340 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
341
342#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
343 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
344 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
345 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
346
347/** Same as iemRegAddToRip64AndFinishingNoFlags. */
348DECL_INLINE_THROW(uint32_t)
349iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
350{
351#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
352# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
353 if (!pReNative->Core.offPc)
354 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
355# endif
356
357 /* Allocate a temporary PC register. */
358 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
359
360 /* Perform the addition and store the result. */
361 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
362 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
363
364 /* Free but don't flush the PC register. */
365 iemNativeRegFreeTmp(pReNative, idxPcReg);
366#endif
367
368#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
369 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
370
371 pReNative->Core.offPc += cbInstr;
372# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
373 off = iemNativePcAdjustCheck(pReNative, off);
374# endif
375 if (pReNative->cCondDepth)
376 off = iemNativeEmitPcWriteback(pReNative, off);
377 else
378 pReNative->Core.cInstrPcUpdateSkipped++;
379#endif
380
381 return off;
382}
383
384
385#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
386 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
387 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
388
389#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
390 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
391 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
392 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
393
394/** Same as iemRegAddToEip32AndFinishingNoFlags. */
395DECL_INLINE_THROW(uint32_t)
396iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
397{
398#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
399# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
400 if (!pReNative->Core.offPc)
401 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
402# endif
403
404 /* Allocate a temporary PC register. */
405 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
406
407 /* Perform the addition and store the result. */
408 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
409 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
410
411 /* Free but don't flush the PC register. */
412 iemNativeRegFreeTmp(pReNative, idxPcReg);
413#endif
414
415#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
416 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
417
418 pReNative->Core.offPc += cbInstr;
419# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
420 off = iemNativePcAdjustCheck(pReNative, off);
421# endif
422 if (pReNative->cCondDepth)
423 off = iemNativeEmitPcWriteback(pReNative, off);
424 else
425 pReNative->Core.cInstrPcUpdateSkipped++;
426#endif
427
428 return off;
429}
430
431
432#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
433 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
434 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
435
436#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
437 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
438 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
439 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
440
441/** Same as iemRegAddToIp16AndFinishingNoFlags. */
442DECL_INLINE_THROW(uint32_t)
443iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
444{
445#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
446# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
447 if (!pReNative->Core.offPc)
448 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
449# endif
450
451 /* Allocate a temporary PC register. */
452 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
453
454 /* Perform the addition and store the result. */
455 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
456 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
457 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
458
459 /* Free but don't flush the PC register. */
460 iemNativeRegFreeTmp(pReNative, idxPcReg);
461#endif
462
463#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
464 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
465
466 pReNative->Core.offPc += cbInstr;
467# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
468 off = iemNativePcAdjustCheck(pReNative, off);
469# endif
470 if (pReNative->cCondDepth)
471 off = iemNativeEmitPcWriteback(pReNative, off);
472 else
473 pReNative->Core.cInstrPcUpdateSkipped++;
474#endif
475
476 return off;
477}
478
479
480
481/*********************************************************************************************************************************
482* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
483*********************************************************************************************************************************/
484
485#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
486 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
487 (a_enmEffOpSize), pCallEntry->idxInstr); \
488 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
489
490#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
491 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
492 (a_enmEffOpSize), pCallEntry->idxInstr); \
493 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
494 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
495
496#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
497 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
498 IEMMODE_16BIT, pCallEntry->idxInstr); \
499 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
500
501#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
502 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
503 IEMMODE_16BIT, pCallEntry->idxInstr); \
504 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
505 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
506
507#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
508 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
509 IEMMODE_64BIT, pCallEntry->idxInstr); \
510 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
511
512#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
513 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
514 IEMMODE_64BIT, pCallEntry->idxInstr); \
515 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
516 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
517
518/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
519 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
520 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
521DECL_INLINE_THROW(uint32_t)
522iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
523 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
524{
525 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
526
527 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
528 off = iemNativeRegFlushPendingWrites(pReNative, off);
529
530#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
531 Assert(pReNative->Core.offPc == 0);
532
533 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
534#endif
535
536 /* Allocate a temporary PC register. */
537 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
538
539 /* Perform the addition. */
540 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
541
542 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
543 {
544 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
545 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
546 }
547 else
548 {
549 /* Just truncate the result to 16-bit IP. */
550 Assert(enmEffOpSize == IEMMODE_16BIT);
551 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
552 }
553 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
554
555 /* Free but don't flush the PC register. */
556 iemNativeRegFreeTmp(pReNative, idxPcReg);
557
558 return off;
559}
560
561
562#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
563 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
564 (a_enmEffOpSize), pCallEntry->idxInstr); \
565 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
566
567#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
568 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
569 (a_enmEffOpSize), pCallEntry->idxInstr); \
570 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
571 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
572
573#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
574 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
575 IEMMODE_16BIT, pCallEntry->idxInstr); \
576 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
577
578#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
579 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
580 IEMMODE_16BIT, pCallEntry->idxInstr); \
581 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
582 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
583
584#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
585 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
586 IEMMODE_32BIT, pCallEntry->idxInstr); \
587 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
588
589#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
590 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
591 IEMMODE_32BIT, pCallEntry->idxInstr); \
592 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
593 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
594
595/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
596 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
597 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
598DECL_INLINE_THROW(uint32_t)
599iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
600 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
601{
602 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
603
604 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
605 off = iemNativeRegFlushPendingWrites(pReNative, off);
606
607#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
608 Assert(pReNative->Core.offPc == 0);
609
610 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
611#endif
612
613 /* Allocate a temporary PC register. */
614 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
615
616 /* Perform the addition. */
617 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
618
619 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
620 if (enmEffOpSize == IEMMODE_16BIT)
621 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
622
623 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
624/** @todo we can skip this in 32-bit FLAT mode. */
625 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
626
627 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
628
629 /* Free but don't flush the PC register. */
630 iemNativeRegFreeTmp(pReNative, idxPcReg);
631
632 return off;
633}
634
635
636#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
637 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
638 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
639
640#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
641 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
642 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
643 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
644
645#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
646 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
647 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
648
649#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
650 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
651 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
652 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
653
654#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
655 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
656 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
657
658#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
659 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
660 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
661 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
662
663/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
664DECL_INLINE_THROW(uint32_t)
665iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
666 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
667{
668 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
669 off = iemNativeRegFlushPendingWrites(pReNative, off);
670
671#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
672 Assert(pReNative->Core.offPc == 0);
673
674 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
675#endif
676
677 /* Allocate a temporary PC register. */
678 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
679
680 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
681 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
682 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
683 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
684 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
685
686 /* Free but don't flush the PC register. */
687 iemNativeRegFreeTmp(pReNative, idxPcReg);
688
689 return off;
690}
691
692
693
694/*********************************************************************************************************************************
695* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
696*********************************************************************************************************************************/
697
698/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
699#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
700 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
701
702/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
703#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
704 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
705
706/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
707#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
708 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
709
710/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
711 * clears flags. */
712#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
713 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
714 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
715
716/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
717 * clears flags. */
718#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
719 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
720 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
721
722/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
723 * clears flags. */
724#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
725 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
726 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
727
728#undef IEM_MC_SET_RIP_U16_AND_FINISH
729
730
731/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
732#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
733 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
734
735/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
736#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
737 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
738
739/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
740 * clears flags. */
741#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
742 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
743 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
744
745/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
746 * and clears flags. */
747#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
748 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
749 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
750
751#undef IEM_MC_SET_RIP_U32_AND_FINISH
752
753
754/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
755#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
756 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
757
758/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
759 * and clears flags. */
760#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
761 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
762 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
763
764#undef IEM_MC_SET_RIP_U64_AND_FINISH
765
766
767/** Same as iemRegRipJumpU16AndFinishNoFlags,
768 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
769DECL_INLINE_THROW(uint32_t)
770iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
771 uint8_t idxInstr, uint8_t cbVar)
772{
773 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
774 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
775
776 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
777 off = iemNativeRegFlushPendingWrites(pReNative, off);
778
779#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
780 Assert(pReNative->Core.offPc == 0);
781
782 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
783#endif
784
785 /* Get a register with the new PC loaded from idxVarPc.
786 Note! This ASSUMES that the high bits of the GPR is zeroed. */
787 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
788
789 /* Check limit (may #GP(0) + exit TB). */
790 if (!f64Bit)
791/** @todo we can skip this test in FLAT 32-bit mode. */
792 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
793 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
794 else if (cbVar > sizeof(uint32_t))
795 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
796
797 /* Store the result. */
798 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
799
800 iemNativeVarRegisterRelease(pReNative, idxVarPc);
801 /** @todo implictly free the variable? */
802
803 return off;
804}
805
806
807
808/*********************************************************************************************************************************
809* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
810*********************************************************************************************************************************/
811
812#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
813 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
814
815/**
816 * Emits code to check if a \#NM exception should be raised.
817 *
818 * @returns New code buffer offset, UINT32_MAX on failure.
819 * @param pReNative The native recompile state.
820 * @param off The code buffer offset.
821 * @param idxInstr The current instruction.
822 */
823DECL_INLINE_THROW(uint32_t)
824iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
825{
826#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
827 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
828
829 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
830 {
831#endif
832 /*
833 * Make sure we don't have any outstanding guest register writes as we may
834 * raise an #NM and all guest register must be up to date in CPUMCTX.
835 */
836 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
837 off = iemNativeRegFlushPendingWrites(pReNative, off);
838
839#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
840 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
841#else
842 RT_NOREF(idxInstr);
843#endif
844
845 /* Allocate a temporary CR0 register. */
846 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
847 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
848
849 /*
850 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
851 * return raisexcpt();
852 */
853 /* Test and jump. */
854 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
855
856 /* Free but don't flush the CR0 register. */
857 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
858
859#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
860 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
861 }
862 else
863 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
864#endif
865
866 return off;
867}
868
869
870#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
871 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
872
873/**
874 * Emits code to check if a \#MF exception should be raised.
875 *
876 * @returns New code buffer offset, UINT32_MAX on failure.
877 * @param pReNative The native recompile state.
878 * @param off The code buffer offset.
879 * @param idxInstr The current instruction.
880 */
881DECL_INLINE_THROW(uint32_t)
882iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
883{
884 /*
885 * Make sure we don't have any outstanding guest register writes as we may
886 * raise an #MF and all guest register must be up to date in CPUMCTX.
887 */
888 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
889 off = iemNativeRegFlushPendingWrites(pReNative, off);
890
891#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
892 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
893#else
894 RT_NOREF(idxInstr);
895#endif
896
897 /* Allocate a temporary FSW register. */
898 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
899 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
900
901 /*
902 * if (FSW & X86_FSW_ES != 0)
903 * return raisexcpt();
904 */
905 /* Test and jump. */
906 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
907
908 /* Free but don't flush the FSW register. */
909 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
910
911 return off;
912}
913
914
915#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
916 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
917
918/**
919 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
920 *
921 * @returns New code buffer offset, UINT32_MAX on failure.
922 * @param pReNative The native recompile state.
923 * @param off The code buffer offset.
924 * @param idxInstr The current instruction.
925 */
926DECL_INLINE_THROW(uint32_t)
927iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
928{
929#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
930 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
931
932 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
933 {
934#endif
935 /*
936 * Make sure we don't have any outstanding guest register writes as we may
937 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
938 */
939 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
940 off = iemNativeRegFlushPendingWrites(pReNative, off);
941
942#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
943 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
944#else
945 RT_NOREF(idxInstr);
946#endif
947
948 /* Allocate a temporary CR0 and CR4 register. */
949 uint8_t const idxLabelRaiseSseRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseRelated);
950 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
951 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
952 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
953
954 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
955#ifdef RT_ARCH_AMD64
956 /*
957 * We do a modified test here:
958 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
959 * else { goto RaiseSseRelated; }
960 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
961 * all targets except the 386, which doesn't support SSE, this should
962 * be a safe assumption.
963 */
964 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
965 //pCodeBuf[off++] = 0xcc;
966 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
967 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
968 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
969 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
970 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
971 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseSseRelated, kIemNativeInstrCond_ne);
972
973#elif defined(RT_ARCH_ARM64)
974 /*
975 * We do a modified test here:
976 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
977 * else { goto RaiseSseRelated; }
978 */
979 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
980 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
981 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
982 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
983 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
984 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
985 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
986 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
987 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
988 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
989 idxLabelRaiseSseRelated);
990
991#else
992# error "Port me!"
993#endif
994
995 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
996 iemNativeRegFreeTmp(pReNative, idxTmpReg);
997 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
998 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
999
1000#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1001 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
1002 }
1003 else
1004 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
1005#endif
1006
1007 return off;
1008}
1009
1010
1011#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
1012 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
1013
1014/**
1015 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
1016 *
1017 * @returns New code buffer offset, UINT32_MAX on failure.
1018 * @param pReNative The native recompile state.
1019 * @param off The code buffer offset.
1020 * @param idxInstr The current instruction.
1021 */
1022DECL_INLINE_THROW(uint32_t)
1023iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1024{
1025#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1026 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
1027
1028 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
1029 {
1030#endif
1031 /*
1032 * Make sure we don't have any outstanding guest register writes as we may
1033 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1034 */
1035 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
1036 off = iemNativeRegFlushPendingWrites(pReNative, off);
1037
1038#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1039 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1040#else
1041 RT_NOREF(idxInstr);
1042#endif
1043
1044 /* Allocate a temporary CR0, CR4 and XCR0 register. */
1045 uint8_t const idxLabelRaiseAvxRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseAvxRelated);
1046 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
1047 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
1048 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
1049 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1050
1051 /*
1052 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
1053 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
1054 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
1055 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
1056 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
1057 * { likely }
1058 * else { goto RaiseAvxRelated; }
1059 */
1060#ifdef RT_ARCH_AMD64
1061 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
1062 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
1063 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
1064 ^ 0x1a) ) { likely }
1065 else { goto RaiseAvxRelated; } */
1066 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
1067 //pCodeBuf[off++] = 0xcc;
1068 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
1069 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
1070 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
1071 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1072 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
1073 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
1074 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1075 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
1076 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
1077 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
1078 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseAvxRelated, kIemNativeInstrCond_ne);
1079
1080#elif defined(RT_ARCH_ARM64)
1081 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
1082 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
1083 else { goto RaiseAvxRelated; } */
1084 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
1085 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1086 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
1087 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
1088 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
1089 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
1090 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
1091 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
1092 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
1093 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
1094 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
1095 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
1096 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1097 idxLabelRaiseAvxRelated);
1098
1099#else
1100# error "Port me!"
1101#endif
1102
1103 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1104 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1105 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1106 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
1107#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1108 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
1109 }
1110 else
1111 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
1112#endif
1113
1114 return off;
1115}
1116
1117
1118#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1119#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1120 off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off)
1121
1122/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
1123DECL_INLINE_THROW(uint32_t)
1124iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1125{
1126 uint8_t const idxLabelRaiseSseAvxFpRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseAvxFpRelated);
1127 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
1128 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1129
1130 /* mov tmp, varmxcsr */
1131 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1132 /* tmp &= X86_MXCSR_XCPT_MASK */
1133 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
1134 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
1135 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
1136 /* tmp = ~tmp */
1137 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
1138 /* tmp &= mxcsr */
1139 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1140 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
1141 idxLabelRaiseSseAvxFpRelated);
1142
1143 /* Free but don't flush the MXCSR register. */
1144 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
1145 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1146
1147 return off;
1148}
1149#endif
1150
1151
1152#define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1153 off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
1154
1155/**
1156 * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
1157 *
1158 * @returns New code buffer offset, UINT32_MAX on failure.
1159 * @param pReNative The native recompile state.
1160 * @param off The code buffer offset.
1161 * @param idxInstr The current instruction.
1162 */
1163DECL_INLINE_THROW(uint32_t)
1164iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1165{
1166 /*
1167 * Make sure we don't have any outstanding guest register writes as we may
1168 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1169 */
1170 off = iemNativeRegFlushPendingWrites(pReNative, off);
1171
1172#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1173 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1174#else
1175 RT_NOREF(idxInstr);
1176#endif
1177
1178 /* Allocate a temporary CR4 register. */
1179 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
1180 uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
1181 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
1182
1183 /*
1184 * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
1185 * return raisexcpt();
1186 */
1187 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
1188
1189 /* raise \#UD exception unconditionally. */
1190 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
1191
1192 /* Free but don't flush the CR4 register. */
1193 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1194
1195 return off;
1196}
1197
1198
1199#define IEM_MC_RAISE_DIVIDE_ERROR() \
1200 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
1201
1202/**
1203 * Emits code to raise a \#DE.
1204 *
1205 * @returns New code buffer offset, UINT32_MAX on failure.
1206 * @param pReNative The native recompile state.
1207 * @param off The code buffer offset.
1208 * @param idxInstr The current instruction.
1209 */
1210DECL_INLINE_THROW(uint32_t)
1211iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1212{
1213 /*
1214 * Make sure we don't have any outstanding guest register writes as we may
1215 */
1216 off = iemNativeRegFlushPendingWrites(pReNative, off);
1217
1218#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1219 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1220#else
1221 RT_NOREF(idxInstr);
1222#endif
1223
1224 uint8_t const idxLabelRaiseDe = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseDe);
1225
1226 /* raise \#DE exception unconditionally. */
1227 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseDe);
1228
1229 return off;
1230}
1231
1232
1233/*********************************************************************************************************************************
1234* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
1235*********************************************************************************************************************************/
1236
1237/**
1238 * Pushes an IEM_MC_IF_XXX onto the condition stack.
1239 *
1240 * @returns Pointer to the condition stack entry on success, NULL on failure
1241 * (too many nestings)
1242 */
1243DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
1244{
1245#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1246 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
1247#endif
1248
1249 uint32_t const idxStack = pReNative->cCondDepth;
1250 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
1251
1252 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
1253 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
1254
1255 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
1256 pEntry->fInElse = false;
1257 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
1258 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
1259
1260 return pEntry;
1261}
1262
1263
1264/**
1265 * Start of the if-block, snapshotting the register and variable state.
1266 */
1267DECL_INLINE_THROW(void)
1268iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
1269{
1270 Assert(offIfBlock != UINT32_MAX);
1271 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1272 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1273 Assert(!pEntry->fInElse);
1274
1275 /* Define the start of the IF block if request or for disassembly purposes. */
1276 if (idxLabelIf != UINT32_MAX)
1277 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
1278#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1279 else
1280 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
1281#else
1282 RT_NOREF(offIfBlock);
1283#endif
1284
1285#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1286 Assert(pReNative->Core.offPc == 0);
1287#endif
1288
1289 /* Copy the initial state so we can restore it in the 'else' block. */
1290 pEntry->InitialState = pReNative->Core;
1291}
1292
1293
1294#define IEM_MC_ELSE() } while (0); \
1295 off = iemNativeEmitElse(pReNative, off); \
1296 do {
1297
1298/** Emits code related to IEM_MC_ELSE. */
1299DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1300{
1301 /* Check sanity and get the conditional stack entry. */
1302 Assert(off != UINT32_MAX);
1303 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1304 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1305 Assert(!pEntry->fInElse);
1306
1307#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1308 /* Writeback any dirty shadow registers. */
1309 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
1310 * in one of the branches and leave guest registers already dirty before the start of the if
1311 * block alone. */
1312 off = iemNativeRegFlushDirtyGuest(pReNative, off);
1313#endif
1314
1315 /* Jump to the endif */
1316 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
1317
1318 /* Define the else label and enter the else part of the condition. */
1319 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1320 pEntry->fInElse = true;
1321
1322#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1323 Assert(pReNative->Core.offPc == 0);
1324#endif
1325
1326 /* Snapshot the core state so we can do a merge at the endif and restore
1327 the snapshot we took at the start of the if-block. */
1328 pEntry->IfFinalState = pReNative->Core;
1329 pReNative->Core = pEntry->InitialState;
1330
1331 return off;
1332}
1333
1334
1335#define IEM_MC_ENDIF() } while (0); \
1336 off = iemNativeEmitEndIf(pReNative, off)
1337
1338/** Emits code related to IEM_MC_ENDIF. */
1339DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1340{
1341 /* Check sanity and get the conditional stack entry. */
1342 Assert(off != UINT32_MAX);
1343 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1344 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1345
1346#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1347 Assert(pReNative->Core.offPc == 0);
1348#endif
1349#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1350 /* Writeback any dirty shadow registers (else branch). */
1351 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
1352 * in one of the branches and leave guest registers already dirty before the start of the if
1353 * block alone. */
1354 off = iemNativeRegFlushDirtyGuest(pReNative, off);
1355#endif
1356
1357 /*
1358 * Now we have find common group with the core state at the end of the
1359 * if-final. Use the smallest common denominator and just drop anything
1360 * that isn't the same in both states.
1361 */
1362 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
1363 * which is why we're doing this at the end of the else-block.
1364 * But we'd need more info about future for that to be worth the effort. */
1365 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
1366#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1367 Assert( pOther->bmGstRegShadowDirty == 0
1368 && pReNative->Core.bmGstRegShadowDirty == 0);
1369#endif
1370
1371 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
1372 {
1373 /* shadow guest stuff first. */
1374 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
1375 if (fGstRegs)
1376 {
1377 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
1378 do
1379 {
1380 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
1381 fGstRegs &= ~RT_BIT_64(idxGstReg);
1382
1383 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
1384 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
1385 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
1386 {
1387 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
1388 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
1389
1390#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1391 /* Writeback any dirty shadow registers we are about to unshadow. */
1392 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
1393#endif
1394 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
1395 }
1396 } while (fGstRegs);
1397 }
1398 else
1399 {
1400 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
1401#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1402 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1403#endif
1404 }
1405
1406 /* Check variables next. For now we must require them to be identical
1407 or stuff we can recreate. */
1408 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
1409 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
1410 if (fVars)
1411 {
1412 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
1413 do
1414 {
1415 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
1416 fVars &= ~RT_BIT_32(idxVar);
1417
1418 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
1419 {
1420 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
1421 continue;
1422 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
1423 {
1424 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1425 if (idxHstReg != UINT8_MAX)
1426 {
1427 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1428 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1429 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
1430 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1431 }
1432 continue;
1433 }
1434 }
1435 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
1436 continue;
1437
1438 /* Irreconcilable, so drop it. */
1439 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1440 if (idxHstReg != UINT8_MAX)
1441 {
1442 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1443 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1444 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
1445 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1446 }
1447 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1448 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
1449 } while (fVars);
1450 }
1451
1452 /* Finally, check that the host register allocations matches. */
1453 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
1454 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
1455 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
1456 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
1457 }
1458
1459 /*
1460 * Define the endif label and maybe the else one if we're still in the 'if' part.
1461 */
1462 if (!pEntry->fInElse)
1463 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1464 else
1465 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
1466 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
1467
1468 /* Pop the conditional stack.*/
1469 pReNative->cCondDepth -= 1;
1470
1471 return off;
1472}
1473
1474
1475#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
1476 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
1477 do {
1478
1479/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
1480DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1481{
1482 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1483 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1484
1485 /* Get the eflags. */
1486 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1487 kIemNativeGstRegUse_ReadOnly);
1488
1489 /* Test and jump. */
1490 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1491
1492 /* Free but don't flush the EFlags register. */
1493 iemNativeRegFreeTmp(pReNative, idxEflReg);
1494
1495 /* Make a copy of the core state now as we start the if-block. */
1496 iemNativeCondStartIfBlock(pReNative, off);
1497
1498 return off;
1499}
1500
1501
1502#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
1503 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
1504 do {
1505
1506/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
1507DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1508{
1509 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1510 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1511
1512 /* Get the eflags. */
1513 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1514 kIemNativeGstRegUse_ReadOnly);
1515
1516 /* Test and jump. */
1517 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1518
1519 /* Free but don't flush the EFlags register. */
1520 iemNativeRegFreeTmp(pReNative, idxEflReg);
1521
1522 /* Make a copy of the core state now as we start the if-block. */
1523 iemNativeCondStartIfBlock(pReNative, off);
1524
1525 return off;
1526}
1527
1528
1529#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
1530 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
1531 do {
1532
1533/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
1534DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1535{
1536 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1537 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1538
1539 /* Get the eflags. */
1540 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1541 kIemNativeGstRegUse_ReadOnly);
1542
1543 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1544 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1545
1546 /* Test and jump. */
1547 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1548
1549 /* Free but don't flush the EFlags register. */
1550 iemNativeRegFreeTmp(pReNative, idxEflReg);
1551
1552 /* Make a copy of the core state now as we start the if-block. */
1553 iemNativeCondStartIfBlock(pReNative, off);
1554
1555 return off;
1556}
1557
1558
1559#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
1560 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
1561 do {
1562
1563/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
1564DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1565{
1566 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1567 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1568
1569 /* Get the eflags. */
1570 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1571 kIemNativeGstRegUse_ReadOnly);
1572
1573 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1574 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1575
1576 /* Test and jump. */
1577 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1578
1579 /* Free but don't flush the EFlags register. */
1580 iemNativeRegFreeTmp(pReNative, idxEflReg);
1581
1582 /* Make a copy of the core state now as we start the if-block. */
1583 iemNativeCondStartIfBlock(pReNative, off);
1584
1585 return off;
1586}
1587
1588
1589#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
1590 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
1591 do {
1592
1593#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
1594 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
1595 do {
1596
1597/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
1598DECL_INLINE_THROW(uint32_t)
1599iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1600 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1601{
1602 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
1603 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1604
1605 /* Get the eflags. */
1606 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1607 kIemNativeGstRegUse_ReadOnly);
1608
1609 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1610 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1611
1612 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1613 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1614 Assert(iBitNo1 != iBitNo2);
1615
1616#ifdef RT_ARCH_AMD64
1617 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
1618
1619 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1620 if (iBitNo1 > iBitNo2)
1621 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1622 else
1623 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1624 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1625
1626#elif defined(RT_ARCH_ARM64)
1627 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1628 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1629
1630 /* and tmpreg, eflreg, #1<<iBitNo1 */
1631 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1632
1633 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1634 if (iBitNo1 > iBitNo2)
1635 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1636 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1637 else
1638 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1639 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1640
1641 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1642
1643#else
1644# error "Port me"
1645#endif
1646
1647 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1648 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1649 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1650
1651 /* Free but don't flush the EFlags and tmp registers. */
1652 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1653 iemNativeRegFreeTmp(pReNative, idxEflReg);
1654
1655 /* Make a copy of the core state now as we start the if-block. */
1656 iemNativeCondStartIfBlock(pReNative, off);
1657
1658 return off;
1659}
1660
1661
1662#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
1663 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
1664 do {
1665
1666#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
1667 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
1668 do {
1669
1670/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
1671 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
1672DECL_INLINE_THROW(uint32_t)
1673iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
1674 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1675{
1676 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
1677 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1678
1679 /* We need an if-block label for the non-inverted variant. */
1680 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
1681 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
1682
1683 /* Get the eflags. */
1684 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1685 kIemNativeGstRegUse_ReadOnly);
1686
1687 /* Translate the flag masks to bit numbers. */
1688 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1689 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1690
1691 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1692 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1693 Assert(iBitNo1 != iBitNo);
1694
1695 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1696 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1697 Assert(iBitNo2 != iBitNo);
1698 Assert(iBitNo2 != iBitNo1);
1699
1700#ifdef RT_ARCH_AMD64
1701 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
1702#elif defined(RT_ARCH_ARM64)
1703 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1704#endif
1705
1706 /* Check for the lone bit first. */
1707 if (!fInverted)
1708 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1709 else
1710 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
1711
1712 /* Then extract and compare the other two bits. */
1713#ifdef RT_ARCH_AMD64
1714 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1715 if (iBitNo1 > iBitNo2)
1716 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1717 else
1718 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1719 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1720
1721#elif defined(RT_ARCH_ARM64)
1722 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1723
1724 /* and tmpreg, eflreg, #1<<iBitNo1 */
1725 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1726
1727 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1728 if (iBitNo1 > iBitNo2)
1729 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1730 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1731 else
1732 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1733 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1734
1735 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1736
1737#else
1738# error "Port me"
1739#endif
1740
1741 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1742 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1743 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1744
1745 /* Free but don't flush the EFlags and tmp registers. */
1746 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1747 iemNativeRegFreeTmp(pReNative, idxEflReg);
1748
1749 /* Make a copy of the core state now as we start the if-block. */
1750 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
1751
1752 return off;
1753}
1754
1755
1756#define IEM_MC_IF_CX_IS_NZ() \
1757 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
1758 do {
1759
1760/** Emits code for IEM_MC_IF_CX_IS_NZ. */
1761DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1762{
1763 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1764
1765 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1766 kIemNativeGstRegUse_ReadOnly);
1767 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
1768 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1769
1770 iemNativeCondStartIfBlock(pReNative, off);
1771 return off;
1772}
1773
1774
1775#define IEM_MC_IF_ECX_IS_NZ() \
1776 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
1777 do {
1778
1779#define IEM_MC_IF_RCX_IS_NZ() \
1780 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
1781 do {
1782
1783/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
1784DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1785{
1786 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1787
1788 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1789 kIemNativeGstRegUse_ReadOnly);
1790 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
1791 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1792
1793 iemNativeCondStartIfBlock(pReNative, off);
1794 return off;
1795}
1796
1797
1798#define IEM_MC_IF_CX_IS_NOT_ONE() \
1799 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
1800 do {
1801
1802/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
1803DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1804{
1805 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1806
1807 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1808 kIemNativeGstRegUse_ReadOnly);
1809#ifdef RT_ARCH_AMD64
1810 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1811#else
1812 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1813 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1814 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1815#endif
1816 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1817
1818 iemNativeCondStartIfBlock(pReNative, off);
1819 return off;
1820}
1821
1822
1823#define IEM_MC_IF_ECX_IS_NOT_ONE() \
1824 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
1825 do {
1826
1827#define IEM_MC_IF_RCX_IS_NOT_ONE() \
1828 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
1829 do {
1830
1831/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
1832DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1833{
1834 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1835
1836 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1837 kIemNativeGstRegUse_ReadOnly);
1838 if (f64Bit)
1839 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1840 else
1841 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1842 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1843
1844 iemNativeCondStartIfBlock(pReNative, off);
1845 return off;
1846}
1847
1848
1849#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1850 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
1851 do {
1852
1853#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1854 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
1855 do {
1856
1857/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
1858 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1859DECL_INLINE_THROW(uint32_t)
1860iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
1861{
1862 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1863 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1864
1865 /* We have to load both RCX and EFLAGS before we can start branching,
1866 otherwise we'll end up in the else-block with an inconsistent
1867 register allocator state.
1868 Doing EFLAGS first as it's more likely to be loaded, right? */
1869 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1870 kIemNativeGstRegUse_ReadOnly);
1871 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1872 kIemNativeGstRegUse_ReadOnly);
1873
1874 /** @todo we could reduce this to a single branch instruction by spending a
1875 * temporary register and some setnz stuff. Not sure if loops are
1876 * worth it. */
1877 /* Check CX. */
1878#ifdef RT_ARCH_AMD64
1879 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1880#else
1881 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1882 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1883 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1884#endif
1885
1886 /* Check the EFlags bit. */
1887 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1888 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1889 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1890 !fCheckIfSet /*fJmpIfSet*/);
1891
1892 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1893 iemNativeRegFreeTmp(pReNative, idxEflReg);
1894
1895 iemNativeCondStartIfBlock(pReNative, off);
1896 return off;
1897}
1898
1899
1900#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1901 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
1902 do {
1903
1904#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1905 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
1906 do {
1907
1908#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1909 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
1910 do {
1911
1912#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1913 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
1914 do {
1915
1916/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
1917 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
1918 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
1919 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1920DECL_INLINE_THROW(uint32_t)
1921iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1922 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
1923{
1924 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1925 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1926
1927 /* We have to load both RCX and EFLAGS before we can start branching,
1928 otherwise we'll end up in the else-block with an inconsistent
1929 register allocator state.
1930 Doing EFLAGS first as it's more likely to be loaded, right? */
1931 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1932 kIemNativeGstRegUse_ReadOnly);
1933 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1934 kIemNativeGstRegUse_ReadOnly);
1935
1936 /** @todo we could reduce this to a single branch instruction by spending a
1937 * temporary register and some setnz stuff. Not sure if loops are
1938 * worth it. */
1939 /* Check RCX/ECX. */
1940 if (f64Bit)
1941 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1942 else
1943 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1944
1945 /* Check the EFlags bit. */
1946 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1947 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1948 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1949 !fCheckIfSet /*fJmpIfSet*/);
1950
1951 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1952 iemNativeRegFreeTmp(pReNative, idxEflReg);
1953
1954 iemNativeCondStartIfBlock(pReNative, off);
1955 return off;
1956}
1957
1958
1959#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
1960 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
1961 do {
1962
1963/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
1964DECL_INLINE_THROW(uint32_t)
1965iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
1966{
1967 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1968
1969 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
1970 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
1971 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
1972 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
1973
1974 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
1975
1976 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
1977
1978 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
1979
1980 iemNativeCondStartIfBlock(pReNative, off);
1981 return off;
1982}
1983
1984
1985#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
1986 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
1987 do {
1988
1989/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
1990DECL_INLINE_THROW(uint32_t)
1991iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
1992{
1993 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1994 Assert(iGReg < 16);
1995
1996 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
1997 kIemNativeGstRegUse_ReadOnly);
1998
1999 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
2000
2001 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2002
2003 iemNativeCondStartIfBlock(pReNative, off);
2004 return off;
2005}
2006
2007
2008#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2009
2010#define IEM_MC_IF_MXCSR_XCPT_PENDING() \
2011 off = iemNativeEmitIfMxcsrXcptPending(pReNative, off); \
2012 do {
2013
2014/** Emits code for IEM_MC_IF_MXCSR_XCPT_PENDING. */
2015DECL_INLINE_THROW(uint32_t)
2016iemNativeEmitIfMxcsrXcptPending(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2017{
2018 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2019
2020 uint8_t const idxGstMxcsrReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
2021 kIemNativeGstRegUse_Calculation);
2022 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
2023
2024 /* mov tmp0, mxcsr */
2025 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp, idxGstMxcsrReg);
2026 /* tmp0 &= X86_MXCSR_XCPT_FLAGS */
2027 off = iemNativeEmitAndGprByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
2028 /* mxcsr &= X86_MXCSR_XCPT_MASK */
2029 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstMxcsrReg, X86_MXCSR_XCPT_MASK);
2030 /* mxcsr ~= mxcsr */
2031 off = iemNativeEmitInvBitsGpr(pReNative, off, idxGstMxcsrReg, idxGstMxcsrReg);
2032 /* mxcsr >>= X86_MXCSR_XCPT_MASK_SHIFT */
2033 off = iemNativeEmitShiftGprRight(pReNative, off, idxGstMxcsrReg, X86_MXCSR_XCPT_MASK_SHIFT);
2034 /* tmp0 &= mxcsr */
2035 off = iemNativeEmitAndGprByGpr(pReNative, off, idxRegTmp, idxGstMxcsrReg);
2036
2037 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxRegTmp, true /*f64Bit*/, pEntry->idxLabelElse);
2038 iemNativeRegFreeTmp(pReNative, idxGstMxcsrReg);
2039 iemNativeRegFreeTmp(pReNative, idxRegTmp);
2040
2041 iemNativeCondStartIfBlock(pReNative, off);
2042 return off;
2043}
2044
2045#endif
2046
2047
2048/*********************************************************************************************************************************
2049* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
2050*********************************************************************************************************************************/
2051
2052#define IEM_MC_NOREF(a_Name) \
2053 RT_NOREF_PV(a_Name)
2054
2055#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
2056 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
2057
2058#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
2059 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
2060
2061#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
2062 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
2063
2064#define IEM_MC_LOCAL(a_Type, a_Name) \
2065 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
2066
2067#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
2068 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
2069
2070#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
2071 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
2072
2073
2074/**
2075 * Sets the host register for @a idxVarRc to @a idxReg.
2076 *
2077 * The register must not be allocated. Any guest register shadowing will be
2078 * implictly dropped by this call.
2079 *
2080 * The variable must not have any register associated with it (causes
2081 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
2082 * implied.
2083 *
2084 * @returns idxReg
2085 * @param pReNative The recompiler state.
2086 * @param idxVar The variable.
2087 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
2088 * @param off For recording in debug info.
2089 *
2090 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
2091 */
2092DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
2093{
2094 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2095 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
2096 Assert(!pVar->fRegAcquired);
2097 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2098 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
2099 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
2100
2101 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
2102 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
2103
2104 iemNativeVarSetKindToStack(pReNative, idxVar);
2105 pVar->idxReg = idxReg;
2106
2107 return idxReg;
2108}
2109
2110
2111/**
2112 * A convenient helper function.
2113 */
2114DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
2115 uint8_t idxReg, uint32_t *poff)
2116{
2117 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
2118 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
2119 return idxReg;
2120}
2121
2122
2123/**
2124 * This is called by IEM_MC_END() to clean up all variables.
2125 */
2126DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
2127{
2128 uint32_t const bmVars = pReNative->Core.bmVars;
2129 if (bmVars != 0)
2130 iemNativeVarFreeAllSlow(pReNative, bmVars);
2131 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
2132 Assert(pReNative->Core.bmStack == 0);
2133}
2134
2135
2136#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
2137
2138/**
2139 * This is called by IEM_MC_FREE_LOCAL.
2140 */
2141DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2142{
2143 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2144 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
2145 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2146}
2147
2148
2149#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
2150
2151/**
2152 * This is called by IEM_MC_FREE_ARG.
2153 */
2154DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2155{
2156 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2157 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
2158 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2159}
2160
2161
2162#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
2163
2164/**
2165 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
2166 */
2167DECL_INLINE_THROW(uint32_t)
2168iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
2169{
2170 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
2171 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
2172 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2173 Assert( pVarDst->cbVar == sizeof(uint16_t)
2174 || pVarDst->cbVar == sizeof(uint32_t));
2175
2176 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
2177 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
2178 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
2179 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
2180 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2181
2182 Assert(pVarDst->cbVar < pVarSrc->cbVar);
2183
2184 /*
2185 * Special case for immediates.
2186 */
2187 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
2188 {
2189 switch (pVarDst->cbVar)
2190 {
2191 case sizeof(uint16_t):
2192 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
2193 break;
2194 case sizeof(uint32_t):
2195 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
2196 break;
2197 default: AssertFailed(); break;
2198 }
2199 }
2200 else
2201 {
2202 /*
2203 * The generic solution for now.
2204 */
2205 /** @todo optimize this by having the python script make sure the source
2206 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
2207 * statement. Then we could just transfer the register assignments. */
2208 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
2209 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
2210 switch (pVarDst->cbVar)
2211 {
2212 case sizeof(uint16_t):
2213 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
2214 break;
2215 case sizeof(uint32_t):
2216 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
2217 break;
2218 default: AssertFailed(); break;
2219 }
2220 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
2221 iemNativeVarRegisterRelease(pReNative, idxVarDst);
2222 }
2223 return off;
2224}
2225
2226
2227
2228/*********************************************************************************************************************************
2229* Emitters for IEM_MC_CALL_CIMPL_XXX *
2230*********************************************************************************************************************************/
2231
2232/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
2233DECL_INLINE_THROW(uint32_t)
2234iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
2235 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
2236
2237{
2238 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
2239
2240#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2241 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
2242 when a calls clobber any of the relevatn control registers. */
2243# if 1
2244 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
2245 {
2246 /* Likely as long as call+ret are done via cimpl. */
2247 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
2248 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
2249 }
2250 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
2251 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2252 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
2253 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2254 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
2255 else
2256 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2257 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2258 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2259
2260# else
2261 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
2262 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2263 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
2264 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2265 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
2266 || pfnCImpl == (uintptr_t)iemCImpl_callf
2267 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
2268 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
2269 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2270 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2271 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2272# endif
2273#endif
2274
2275 /*
2276 * Do all the call setup and cleanup.
2277 */
2278 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
2279
2280 /*
2281 * Load the two or three hidden arguments.
2282 */
2283#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2284 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
2285 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2286 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
2287#else
2288 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2289 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
2290#endif
2291
2292 /*
2293 * Make the call and check the return code.
2294 *
2295 * Shadow PC copies are always flushed here, other stuff depends on flags.
2296 * Segment and general purpose registers are explictily flushed via the
2297 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
2298 * macros.
2299 */
2300 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
2301#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2302 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
2303#endif
2304 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
2305 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
2306 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
2307 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
2308
2309 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
2310}
2311
2312
2313#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
2314 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
2315
2316/** Emits code for IEM_MC_CALL_CIMPL_1. */
2317DECL_INLINE_THROW(uint32_t)
2318iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2319 uintptr_t pfnCImpl, uint8_t idxArg0)
2320{
2321 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2322 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
2323}
2324
2325
2326#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
2327 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
2328
2329/** Emits code for IEM_MC_CALL_CIMPL_2. */
2330DECL_INLINE_THROW(uint32_t)
2331iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2332 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
2333{
2334 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2335 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2336 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
2337}
2338
2339
2340#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
2341 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2342 (uintptr_t)a_pfnCImpl, a0, a1, a2)
2343
2344/** Emits code for IEM_MC_CALL_CIMPL_3. */
2345DECL_INLINE_THROW(uint32_t)
2346iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2347 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2348{
2349 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2350 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2351 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2352 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
2353}
2354
2355
2356#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
2357 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2358 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
2359
2360/** Emits code for IEM_MC_CALL_CIMPL_4. */
2361DECL_INLINE_THROW(uint32_t)
2362iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2363 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2364{
2365 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2366 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2367 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2368 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2369 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
2370}
2371
2372
2373#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
2374 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2375 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
2376
2377/** Emits code for IEM_MC_CALL_CIMPL_4. */
2378DECL_INLINE_THROW(uint32_t)
2379iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2380 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
2381{
2382 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2383 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2384 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2385 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2386 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
2387 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
2388}
2389
2390
2391/** Recompiler debugging: Flush guest register shadow copies. */
2392#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
2393
2394
2395
2396/*********************************************************************************************************************************
2397* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
2398*********************************************************************************************************************************/
2399
2400/**
2401 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
2402 */
2403DECL_INLINE_THROW(uint32_t)
2404iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2405 uintptr_t pfnAImpl, uint8_t cArgs)
2406{
2407 if (idxVarRc != UINT8_MAX)
2408 {
2409 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
2410 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
2411 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2412 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2413 }
2414
2415 /*
2416 * Do all the call setup and cleanup.
2417 */
2418 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
2419
2420 /*
2421 * Make the call and update the return code variable if we've got one.
2422 */
2423 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
2424 if (idxVarRc != UINT8_MAX)
2425 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
2426
2427 return off;
2428}
2429
2430
2431
2432#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
2433 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
2434
2435#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
2436 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
2437
2438/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
2439DECL_INLINE_THROW(uint32_t)
2440iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
2441{
2442 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
2443}
2444
2445
2446#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
2447 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
2448
2449#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
2450 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
2451
2452/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
2453DECL_INLINE_THROW(uint32_t)
2454iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
2455{
2456 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2457 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
2458}
2459
2460
2461#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
2462 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
2463
2464#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
2465 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
2466
2467/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
2468DECL_INLINE_THROW(uint32_t)
2469iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2470 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
2471{
2472 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2473 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2474 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
2475}
2476
2477
2478#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
2479 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
2480
2481#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
2482 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
2483
2484/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
2485DECL_INLINE_THROW(uint32_t)
2486iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2487 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2488{
2489 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2490 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2491 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2492 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
2493}
2494
2495
2496#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
2497 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2498
2499#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
2500 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2501
2502/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
2503DECL_INLINE_THROW(uint32_t)
2504iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2505 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2506{
2507 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2508 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2509 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2510 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
2511 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
2512}
2513
2514
2515
2516/*********************************************************************************************************************************
2517* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
2518*********************************************************************************************************************************/
2519
2520#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
2521 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
2522
2523#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2524 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
2525
2526#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2527 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
2528
2529#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2530 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
2531
2532
2533/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
2534 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
2535DECL_INLINE_THROW(uint32_t)
2536iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
2537{
2538 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2539 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2540 Assert(iGRegEx < 20);
2541
2542 /* Same discussion as in iemNativeEmitFetchGregU16 */
2543 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2544 kIemNativeGstRegUse_ReadOnly);
2545
2546 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2547 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2548
2549 /* The value is zero-extended to the full 64-bit host register width. */
2550 if (iGRegEx < 16)
2551 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2552 else
2553 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2554
2555 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2556 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2557 return off;
2558}
2559
2560
2561#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2562 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
2563
2564#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2565 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
2566
2567#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2568 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
2569
2570/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
2571DECL_INLINE_THROW(uint32_t)
2572iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
2573{
2574 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2575 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2576 Assert(iGRegEx < 20);
2577
2578 /* Same discussion as in iemNativeEmitFetchGregU16 */
2579 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2580 kIemNativeGstRegUse_ReadOnly);
2581
2582 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2583 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2584
2585 if (iGRegEx < 16)
2586 {
2587 switch (cbSignExtended)
2588 {
2589 case sizeof(uint16_t):
2590 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2591 break;
2592 case sizeof(uint32_t):
2593 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2594 break;
2595 case sizeof(uint64_t):
2596 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2597 break;
2598 default: AssertFailed(); break;
2599 }
2600 }
2601 else
2602 {
2603 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2604 switch (cbSignExtended)
2605 {
2606 case sizeof(uint16_t):
2607 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2608 break;
2609 case sizeof(uint32_t):
2610 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2611 break;
2612 case sizeof(uint64_t):
2613 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2614 break;
2615 default: AssertFailed(); break;
2616 }
2617 }
2618
2619 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2620 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2621 return off;
2622}
2623
2624
2625
2626#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
2627 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
2628
2629#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
2630 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2631
2632#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
2633 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2634
2635/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
2636DECL_INLINE_THROW(uint32_t)
2637iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2638{
2639 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2640 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2641 Assert(iGReg < 16);
2642
2643 /*
2644 * We can either just load the low 16-bit of the GPR into a host register
2645 * for the variable, or we can do so via a shadow copy host register. The
2646 * latter will avoid having to reload it if it's being stored later, but
2647 * will waste a host register if it isn't touched again. Since we don't
2648 * know what going to happen, we choose the latter for now.
2649 */
2650 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2651 kIemNativeGstRegUse_ReadOnly);
2652
2653 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2654 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2655 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2656 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2657
2658 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2659 return off;
2660}
2661
2662
2663#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
2664 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2665
2666#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
2667 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2668
2669/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
2670DECL_INLINE_THROW(uint32_t)
2671iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
2672{
2673 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2674 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2675 Assert(iGReg < 16);
2676
2677 /*
2678 * We can either just load the low 16-bit of the GPR into a host register
2679 * for the variable, or we can do so via a shadow copy host register. The
2680 * latter will avoid having to reload it if it's being stored later, but
2681 * will waste a host register if it isn't touched again. Since we don't
2682 * know what going to happen, we choose the latter for now.
2683 */
2684 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2685 kIemNativeGstRegUse_ReadOnly);
2686
2687 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2688 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2689 if (cbSignExtended == sizeof(uint32_t))
2690 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2691 else
2692 {
2693 Assert(cbSignExtended == sizeof(uint64_t));
2694 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2695 }
2696 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2697
2698 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2699 return off;
2700}
2701
2702
2703#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
2704 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
2705
2706#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
2707 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
2708
2709/** Emits code for IEM_MC_FETCH_GREG_U32. */
2710DECL_INLINE_THROW(uint32_t)
2711iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2712{
2713 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2714 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2715 Assert(iGReg < 16);
2716
2717 /*
2718 * We can either just load the low 16-bit of the GPR into a host register
2719 * for the variable, or we can do so via a shadow copy host register. The
2720 * latter will avoid having to reload it if it's being stored later, but
2721 * will waste a host register if it isn't touched again. Since we don't
2722 * know what going to happen, we choose the latter for now.
2723 */
2724 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2725 kIemNativeGstRegUse_ReadOnly);
2726
2727 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2728 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2729 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2730 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2731
2732 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2733 return off;
2734}
2735
2736
2737#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
2738 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
2739
2740/** Emits code for IEM_MC_FETCH_GREG_U32. */
2741DECL_INLINE_THROW(uint32_t)
2742iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2743{
2744 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2745 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2746 Assert(iGReg < 16);
2747
2748 /*
2749 * We can either just load the low 32-bit of the GPR into a host register
2750 * for the variable, or we can do so via a shadow copy host register. The
2751 * latter will avoid having to reload it if it's being stored later, but
2752 * will waste a host register if it isn't touched again. Since we don't
2753 * know what going to happen, we choose the latter for now.
2754 */
2755 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2756 kIemNativeGstRegUse_ReadOnly);
2757
2758 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2759 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2760 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2761 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2762
2763 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2764 return off;
2765}
2766
2767
2768#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
2769 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2770
2771#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
2772 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2773
2774/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
2775 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
2776DECL_INLINE_THROW(uint32_t)
2777iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2778{
2779 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2780 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2781 Assert(iGReg < 16);
2782
2783 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2784 kIemNativeGstRegUse_ReadOnly);
2785
2786 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2787 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2788 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
2789 /** @todo name the register a shadow one already? */
2790 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2791
2792 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2793 return off;
2794}
2795
2796
2797
2798/*********************************************************************************************************************************
2799* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
2800*********************************************************************************************************************************/
2801
2802#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
2803 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
2804
2805/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
2806DECL_INLINE_THROW(uint32_t)
2807iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
2808{
2809 Assert(iGRegEx < 20);
2810 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2811 kIemNativeGstRegUse_ForUpdate);
2812#ifdef RT_ARCH_AMD64
2813 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2814
2815 /* To the lowest byte of the register: mov r8, imm8 */
2816 if (iGRegEx < 16)
2817 {
2818 if (idxGstTmpReg >= 8)
2819 pbCodeBuf[off++] = X86_OP_REX_B;
2820 else if (idxGstTmpReg >= 4)
2821 pbCodeBuf[off++] = X86_OP_REX;
2822 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2823 pbCodeBuf[off++] = u8Value;
2824 }
2825 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
2826 else if (idxGstTmpReg < 4)
2827 {
2828 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
2829 pbCodeBuf[off++] = u8Value;
2830 }
2831 else
2832 {
2833 /* ror reg64, 8 */
2834 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2835 pbCodeBuf[off++] = 0xc1;
2836 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2837 pbCodeBuf[off++] = 8;
2838
2839 /* mov reg8, imm8 */
2840 if (idxGstTmpReg >= 8)
2841 pbCodeBuf[off++] = X86_OP_REX_B;
2842 else if (idxGstTmpReg >= 4)
2843 pbCodeBuf[off++] = X86_OP_REX;
2844 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2845 pbCodeBuf[off++] = u8Value;
2846
2847 /* rol reg64, 8 */
2848 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2849 pbCodeBuf[off++] = 0xc1;
2850 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2851 pbCodeBuf[off++] = 8;
2852 }
2853
2854#elif defined(RT_ARCH_ARM64)
2855 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
2856 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2857 if (iGRegEx < 16)
2858 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
2859 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
2860 else
2861 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
2862 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
2863 iemNativeRegFreeTmp(pReNative, idxImmReg);
2864
2865#else
2866# error "Port me!"
2867#endif
2868
2869 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2870
2871#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
2872 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
2873#endif
2874
2875 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2876 return off;
2877}
2878
2879
2880#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
2881 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
2882
2883/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
2884DECL_INLINE_THROW(uint32_t)
2885iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
2886{
2887 Assert(iGRegEx < 20);
2888 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
2889
2890 /*
2891 * If it's a constant value (unlikely) we treat this as a
2892 * IEM_MC_STORE_GREG_U8_CONST statement.
2893 */
2894 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
2895 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
2896 { /* likely */ }
2897 else
2898 {
2899 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
2900 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2901 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
2902 }
2903
2904 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2905 kIemNativeGstRegUse_ForUpdate);
2906 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
2907
2908#ifdef RT_ARCH_AMD64
2909 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
2910 if (iGRegEx < 16)
2911 {
2912 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
2913 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
2914 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
2915 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
2916 pbCodeBuf[off++] = X86_OP_REX;
2917 pbCodeBuf[off++] = 0x8a;
2918 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
2919 }
2920 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
2921 else if (idxGstTmpReg < 4 && idxVarReg < 4)
2922 {
2923 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
2924 pbCodeBuf[off++] = 0x8a;
2925 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
2926 }
2927 else
2928 {
2929 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
2930
2931 /* ror reg64, 8 */
2932 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2933 pbCodeBuf[off++] = 0xc1;
2934 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2935 pbCodeBuf[off++] = 8;
2936
2937 /* mov reg8, reg8(r/m) */
2938 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
2939 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
2940 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
2941 pbCodeBuf[off++] = X86_OP_REX;
2942 pbCodeBuf[off++] = 0x8a;
2943 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
2944
2945 /* rol reg64, 8 */
2946 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2947 pbCodeBuf[off++] = 0xc1;
2948 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2949 pbCodeBuf[off++] = 8;
2950 }
2951
2952#elif defined(RT_ARCH_ARM64)
2953 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
2954 or
2955 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
2956 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2957 if (iGRegEx < 16)
2958 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
2959 else
2960 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
2961
2962#else
2963# error "Port me!"
2964#endif
2965 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2966
2967 iemNativeVarRegisterRelease(pReNative, idxValueVar);
2968
2969#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
2970 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
2971#endif
2972 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2973 return off;
2974}
2975
2976
2977
2978#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
2979 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
2980
2981/** Emits code for IEM_MC_STORE_GREG_U16. */
2982DECL_INLINE_THROW(uint32_t)
2983iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
2984{
2985 Assert(iGReg < 16);
2986 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2987 kIemNativeGstRegUse_ForUpdate);
2988#ifdef RT_ARCH_AMD64
2989 /* mov reg16, imm16 */
2990 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
2991 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2992 if (idxGstTmpReg >= 8)
2993 pbCodeBuf[off++] = X86_OP_REX_B;
2994 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
2995 pbCodeBuf[off++] = RT_BYTE1(uValue);
2996 pbCodeBuf[off++] = RT_BYTE2(uValue);
2997
2998#elif defined(RT_ARCH_ARM64)
2999 /* movk xdst, #uValue, lsl #0 */
3000 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3001 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
3002
3003#else
3004# error "Port me!"
3005#endif
3006
3007 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3008
3009#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3010 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3011#endif
3012 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3013 return off;
3014}
3015
3016
3017#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
3018 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
3019
3020/** Emits code for IEM_MC_STORE_GREG_U16. */
3021DECL_INLINE_THROW(uint32_t)
3022iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3023{
3024 Assert(iGReg < 16);
3025 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3026
3027 /*
3028 * If it's a constant value (unlikely) we treat this as a
3029 * IEM_MC_STORE_GREG_U16_CONST statement.
3030 */
3031 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3032 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3033 { /* likely */ }
3034 else
3035 {
3036 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3037 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3038 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
3039 }
3040
3041 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3042 kIemNativeGstRegUse_ForUpdate);
3043
3044#ifdef RT_ARCH_AMD64
3045 /* mov reg16, reg16 or [mem16] */
3046 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3047 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3048 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
3049 {
3050 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
3051 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
3052 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
3053 pbCodeBuf[off++] = 0x8b;
3054 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
3055 }
3056 else
3057 {
3058 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
3059 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
3060 if (idxGstTmpReg >= 8)
3061 pbCodeBuf[off++] = X86_OP_REX_R;
3062 pbCodeBuf[off++] = 0x8b;
3063 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
3064 }
3065
3066#elif defined(RT_ARCH_ARM64)
3067 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
3068 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
3069 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3070 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
3071 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3072
3073#else
3074# error "Port me!"
3075#endif
3076
3077 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3078
3079#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3080 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3081#endif
3082 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3083 return off;
3084}
3085
3086
3087#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
3088 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
3089
3090/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
3091DECL_INLINE_THROW(uint32_t)
3092iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
3093{
3094 Assert(iGReg < 16);
3095 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3096 kIemNativeGstRegUse_ForFullWrite);
3097 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3098#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3099 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3100#endif
3101 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3102 return off;
3103}
3104
3105
3106#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
3107 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
3108
3109/** Emits code for IEM_MC_STORE_GREG_U32. */
3110DECL_INLINE_THROW(uint32_t)
3111iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3112{
3113 Assert(iGReg < 16);
3114 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3115
3116 /*
3117 * If it's a constant value (unlikely) we treat this as a
3118 * IEM_MC_STORE_GREG_U32_CONST statement.
3119 */
3120 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3121 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3122 { /* likely */ }
3123 else
3124 {
3125 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3126 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3127 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
3128 }
3129
3130 /*
3131 * For the rest we allocate a guest register for the variable and writes
3132 * it to the CPUMCTX structure.
3133 */
3134 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3135#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3136 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3137#else
3138 RT_NOREF(idxVarReg);
3139#endif
3140#ifdef VBOX_STRICT
3141 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
3142#endif
3143 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3144 return off;
3145}
3146
3147
3148#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
3149 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
3150
3151/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
3152DECL_INLINE_THROW(uint32_t)
3153iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
3154{
3155 Assert(iGReg < 16);
3156 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3157 kIemNativeGstRegUse_ForFullWrite);
3158 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3159#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3160 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3161#endif
3162 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3163 return off;
3164}
3165
3166
3167#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
3168 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
3169
3170#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
3171 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
3172
3173/** Emits code for IEM_MC_STORE_GREG_U64. */
3174DECL_INLINE_THROW(uint32_t)
3175iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3176{
3177 Assert(iGReg < 16);
3178 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3179
3180 /*
3181 * If it's a constant value (unlikely) we treat this as a
3182 * IEM_MC_STORE_GREG_U64_CONST statement.
3183 */
3184 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3185 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3186 { /* likely */ }
3187 else
3188 {
3189 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3190 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3191 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
3192 }
3193
3194 /*
3195 * For the rest we allocate a guest register for the variable and writes
3196 * it to the CPUMCTX structure.
3197 */
3198 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3199#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3200 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3201#else
3202 RT_NOREF(idxVarReg);
3203#endif
3204 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3205 return off;
3206}
3207
3208
3209#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
3210 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
3211
3212/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
3213DECL_INLINE_THROW(uint32_t)
3214iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
3215{
3216 Assert(iGReg < 16);
3217 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3218 kIemNativeGstRegUse_ForUpdate);
3219 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
3220#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3221 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3222#endif
3223 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3224 return off;
3225}
3226
3227
3228/*********************************************************************************************************************************
3229* General purpose register manipulation (add, sub). *
3230*********************************************************************************************************************************/
3231
3232#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3233 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3234
3235/** Emits code for IEM_MC_ADD_GREG_U16. */
3236DECL_INLINE_THROW(uint32_t)
3237iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
3238{
3239 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3240 kIemNativeGstRegUse_ForUpdate);
3241
3242#ifdef RT_ARCH_AMD64
3243 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3244 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3245 if (idxGstTmpReg >= 8)
3246 pbCodeBuf[off++] = X86_OP_REX_B;
3247 if (uAddend == 1)
3248 {
3249 pbCodeBuf[off++] = 0xff; /* inc */
3250 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3251 }
3252 else
3253 {
3254 pbCodeBuf[off++] = 0x81;
3255 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3256 pbCodeBuf[off++] = uAddend;
3257 pbCodeBuf[off++] = 0;
3258 }
3259
3260#else
3261 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3262 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3263
3264 /* sub tmp, gstgrp, uAddend */
3265 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
3266
3267 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3268 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3269
3270 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3271#endif
3272
3273 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3274
3275#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3276 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3277#endif
3278
3279 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3280 return off;
3281}
3282
3283
3284#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
3285 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3286
3287#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
3288 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3289
3290/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
3291DECL_INLINE_THROW(uint32_t)
3292iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
3293{
3294 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3295 kIemNativeGstRegUse_ForUpdate);
3296
3297#ifdef RT_ARCH_AMD64
3298 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3299 if (f64Bit)
3300 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3301 else if (idxGstTmpReg >= 8)
3302 pbCodeBuf[off++] = X86_OP_REX_B;
3303 if (uAddend == 1)
3304 {
3305 pbCodeBuf[off++] = 0xff; /* inc */
3306 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3307 }
3308 else if (uAddend < 128)
3309 {
3310 pbCodeBuf[off++] = 0x83; /* add */
3311 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3312 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3313 }
3314 else
3315 {
3316 pbCodeBuf[off++] = 0x81; /* add */
3317 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3318 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3319 pbCodeBuf[off++] = 0;
3320 pbCodeBuf[off++] = 0;
3321 pbCodeBuf[off++] = 0;
3322 }
3323
3324#else
3325 /* sub tmp, gstgrp, uAddend */
3326 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3327 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
3328
3329#endif
3330
3331 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3332
3333#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3334 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3335#endif
3336
3337 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3338 return off;
3339}
3340
3341
3342
3343#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3344 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3345
3346/** Emits code for IEM_MC_SUB_GREG_U16. */
3347DECL_INLINE_THROW(uint32_t)
3348iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
3349{
3350 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3351 kIemNativeGstRegUse_ForUpdate);
3352
3353#ifdef RT_ARCH_AMD64
3354 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3355 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3356 if (idxGstTmpReg >= 8)
3357 pbCodeBuf[off++] = X86_OP_REX_B;
3358 if (uSubtrahend == 1)
3359 {
3360 pbCodeBuf[off++] = 0xff; /* dec */
3361 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3362 }
3363 else
3364 {
3365 pbCodeBuf[off++] = 0x81;
3366 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3367 pbCodeBuf[off++] = uSubtrahend;
3368 pbCodeBuf[off++] = 0;
3369 }
3370
3371#else
3372 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3373 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3374
3375 /* sub tmp, gstgrp, uSubtrahend */
3376 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
3377
3378 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3379 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3380
3381 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3382#endif
3383
3384 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3385
3386#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3387 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3388#endif
3389
3390 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3391 return off;
3392}
3393
3394
3395#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
3396 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3397
3398#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
3399 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3400
3401/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
3402DECL_INLINE_THROW(uint32_t)
3403iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
3404{
3405 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3406 kIemNativeGstRegUse_ForUpdate);
3407
3408#ifdef RT_ARCH_AMD64
3409 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3410 if (f64Bit)
3411 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3412 else if (idxGstTmpReg >= 8)
3413 pbCodeBuf[off++] = X86_OP_REX_B;
3414 if (uSubtrahend == 1)
3415 {
3416 pbCodeBuf[off++] = 0xff; /* dec */
3417 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3418 }
3419 else if (uSubtrahend < 128)
3420 {
3421 pbCodeBuf[off++] = 0x83; /* sub */
3422 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3423 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3424 }
3425 else
3426 {
3427 pbCodeBuf[off++] = 0x81; /* sub */
3428 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3429 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3430 pbCodeBuf[off++] = 0;
3431 pbCodeBuf[off++] = 0;
3432 pbCodeBuf[off++] = 0;
3433 }
3434
3435#else
3436 /* sub tmp, gstgrp, uSubtrahend */
3437 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3438 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
3439
3440#endif
3441
3442 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3443
3444#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3445 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3446#endif
3447
3448 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3449 return off;
3450}
3451
3452
3453#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
3454 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3455
3456#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
3457 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3458
3459#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
3460 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3461
3462#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
3463 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3464
3465/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
3466DECL_INLINE_THROW(uint32_t)
3467iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3468{
3469#ifdef VBOX_STRICT
3470 switch (cbMask)
3471 {
3472 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3473 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3474 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3475 case sizeof(uint64_t): break;
3476 default: AssertFailedBreak();
3477 }
3478#endif
3479
3480 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3481 kIemNativeGstRegUse_ForUpdate);
3482
3483 switch (cbMask)
3484 {
3485 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3486 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
3487 break;
3488 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
3489 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
3490 break;
3491 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3492 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3493 break;
3494 case sizeof(uint64_t):
3495 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
3496 break;
3497 default: AssertFailedBreak();
3498 }
3499
3500 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3501
3502#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3503 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3504#endif
3505
3506 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3507 return off;
3508}
3509
3510
3511#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
3512 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3513
3514#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
3515 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3516
3517#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
3518 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3519
3520#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
3521 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3522
3523/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
3524DECL_INLINE_THROW(uint32_t)
3525iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3526{
3527#ifdef VBOX_STRICT
3528 switch (cbMask)
3529 {
3530 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3531 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3532 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3533 case sizeof(uint64_t): break;
3534 default: AssertFailedBreak();
3535 }
3536#endif
3537
3538 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3539 kIemNativeGstRegUse_ForUpdate);
3540
3541 switch (cbMask)
3542 {
3543 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3544 case sizeof(uint16_t):
3545 case sizeof(uint64_t):
3546 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
3547 break;
3548 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3549 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3550 break;
3551 default: AssertFailedBreak();
3552 }
3553
3554 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3555
3556#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3557 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3558#endif
3559
3560 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3561 return off;
3562}
3563
3564
3565/*********************************************************************************************************************************
3566* Local/Argument variable manipulation (add, sub, and, or). *
3567*********************************************************************************************************************************/
3568
3569#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
3570 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3571
3572#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
3573 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3574
3575#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
3576 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3577
3578#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
3579 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3580
3581
3582#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
3583 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
3584
3585#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
3586 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
3587
3588#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
3589 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
3590
3591/** Emits code for AND'ing a local and a constant value. */
3592DECL_INLINE_THROW(uint32_t)
3593iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3594{
3595#ifdef VBOX_STRICT
3596 switch (cbMask)
3597 {
3598 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3599 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3600 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3601 case sizeof(uint64_t): break;
3602 default: AssertFailedBreak();
3603 }
3604#endif
3605
3606 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3607 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3608
3609 if (cbMask <= sizeof(uint32_t))
3610 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
3611 else
3612 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
3613
3614 iemNativeVarRegisterRelease(pReNative, idxVar);
3615 return off;
3616}
3617
3618
3619#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
3620 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3621
3622#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
3623 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3624
3625#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
3626 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3627
3628#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
3629 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3630
3631/** Emits code for OR'ing a local and a constant value. */
3632DECL_INLINE_THROW(uint32_t)
3633iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3634{
3635#ifdef VBOX_STRICT
3636 switch (cbMask)
3637 {
3638 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3639 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3640 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3641 case sizeof(uint64_t): break;
3642 default: AssertFailedBreak();
3643 }
3644#endif
3645
3646 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3647 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3648
3649 if (cbMask <= sizeof(uint32_t))
3650 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
3651 else
3652 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
3653
3654 iemNativeVarRegisterRelease(pReNative, idxVar);
3655 return off;
3656}
3657
3658
3659#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
3660 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
3661
3662#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
3663 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
3664
3665#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
3666 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
3667
3668/** Emits code for reversing the byte order in a local value. */
3669DECL_INLINE_THROW(uint32_t)
3670iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
3671{
3672 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3673 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3674
3675 switch (cbLocal)
3676 {
3677 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
3678 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
3679 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
3680 default: AssertFailedBreak();
3681 }
3682
3683 iemNativeVarRegisterRelease(pReNative, idxVar);
3684 return off;
3685}
3686
3687
3688#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
3689 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
3690
3691#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
3692 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
3693
3694#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
3695 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
3696
3697/** Emits code for shifting left a local value. */
3698DECL_INLINE_THROW(uint32_t)
3699iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
3700{
3701#ifdef VBOX_STRICT
3702 switch (cbLocal)
3703 {
3704 case sizeof(uint8_t): Assert(cShift < 8); break;
3705 case sizeof(uint16_t): Assert(cShift < 16); break;
3706 case sizeof(uint32_t): Assert(cShift < 32); break;
3707 case sizeof(uint64_t): Assert(cShift < 64); break;
3708 default: AssertFailedBreak();
3709 }
3710#endif
3711
3712 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3713 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3714
3715 if (cbLocal <= sizeof(uint32_t))
3716 {
3717 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
3718 if (cbLocal < sizeof(uint32_t))
3719 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
3720 cbLocal == sizeof(uint16_t)
3721 ? UINT32_C(0xffff)
3722 : UINT32_C(0xff));
3723 }
3724 else
3725 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
3726
3727 iemNativeVarRegisterRelease(pReNative, idxVar);
3728 return off;
3729}
3730
3731
3732#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
3733 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
3734
3735#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
3736 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
3737
3738#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
3739 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
3740
3741/** Emits code for shifting left a local value. */
3742DECL_INLINE_THROW(uint32_t)
3743iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
3744{
3745#ifdef VBOX_STRICT
3746 switch (cbLocal)
3747 {
3748 case sizeof(int8_t): Assert(cShift < 8); break;
3749 case sizeof(int16_t): Assert(cShift < 16); break;
3750 case sizeof(int32_t): Assert(cShift < 32); break;
3751 case sizeof(int64_t): Assert(cShift < 64); break;
3752 default: AssertFailedBreak();
3753 }
3754#endif
3755
3756 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3757 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3758
3759 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
3760 if (cbLocal == sizeof(uint8_t))
3761 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3762 else if (cbLocal == sizeof(uint16_t))
3763 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
3764
3765 if (cbLocal <= sizeof(uint32_t))
3766 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
3767 else
3768 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
3769
3770 iemNativeVarRegisterRelease(pReNative, idxVar);
3771 return off;
3772}
3773
3774
3775#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
3776 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
3777
3778#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
3779 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
3780
3781#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
3782 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
3783
3784/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
3785DECL_INLINE_THROW(uint32_t)
3786iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
3787{
3788 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
3789 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
3790 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3791 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3792
3793 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3794 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
3795
3796 /* Need to sign extend the value. */
3797 if (cbLocal <= sizeof(uint32_t))
3798 {
3799/** @todo ARM64: In case of boredone, the extended add instruction can do the
3800 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
3801 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
3802
3803 switch (cbLocal)
3804 {
3805 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
3806 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
3807 default: AssertFailed();
3808 }
3809
3810 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
3811 iemNativeRegFreeTmp(pReNative, idxRegTmp);
3812 }
3813 else
3814 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
3815
3816 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
3817 iemNativeVarRegisterRelease(pReNative, idxVar);
3818 return off;
3819}
3820
3821
3822
3823/*********************************************************************************************************************************
3824* EFLAGS *
3825*********************************************************************************************************************************/
3826
3827#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
3828# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
3829#else
3830# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
3831 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
3832
3833DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
3834{
3835 if (fEflOutput)
3836 {
3837 PVMCPUCC const pVCpu = pReNative->pVCpu;
3838# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3839 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
3840 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
3841 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
3842# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3843 if (fEflOutput & (a_fEfl)) \
3844 { \
3845 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
3846 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3847 else \
3848 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3849 } else do { } while (0)
3850# else
3851 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
3852 IEMLIVENESSBIT const LivenessClobbered =
3853 {
3854 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3855 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3856 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3857 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3858 };
3859 IEMLIVENESSBIT const LivenessDelayable =
3860 {
3861 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3862 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3863 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3864 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3865 };
3866# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3867 if (fEflOutput & (a_fEfl)) \
3868 { \
3869 if (LivenessClobbered.a_fLivenessMember) \
3870 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3871 else if (LivenessDelayable.a_fLivenessMember) \
3872 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
3873 else \
3874 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3875 } else do { } while (0)
3876# endif
3877 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
3878 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
3879 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
3880 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
3881 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
3882 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
3883 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
3884# undef CHECK_FLAG_AND_UPDATE_STATS
3885 }
3886 RT_NOREF(fEflInput);
3887}
3888#endif /* VBOX_WITH_STATISTICS */
3889
3890#undef IEM_MC_FETCH_EFLAGS /* should not be used */
3891#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3892 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
3893
3894/** Handles IEM_MC_FETCH_EFLAGS_EX. */
3895DECL_INLINE_THROW(uint32_t)
3896iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
3897 uint32_t fEflInput, uint32_t fEflOutput)
3898{
3899 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
3900 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
3901 RT_NOREF(fEflInput, fEflOutput);
3902
3903#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3904# ifdef VBOX_STRICT
3905 if ( pReNative->idxCurCall != 0
3906 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
3907 {
3908 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
3909 uint32_t const fBoth = fEflInput | fEflOutput;
3910# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
3911 AssertMsg( !(fBoth & (a_fElfConst)) \
3912 || (!(fEflInput & (a_fElfConst)) \
3913 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
3914 : !(fEflOutput & (a_fElfConst)) \
3915 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
3916 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
3917 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
3918 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
3919 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
3920 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
3921 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
3922 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
3923 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
3924 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
3925# undef ASSERT_ONE_EFL
3926 }
3927# endif
3928#endif
3929
3930 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
3931
3932 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
3933 * the existing shadow copy. */
3934 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
3935 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
3936 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
3937 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
3938 return off;
3939}
3940
3941
3942
3943/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
3944 * start using it with custom native code emission (inlining assembly
3945 * instruction helpers). */
3946#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
3947#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3948 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
3949 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
3950
3951#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
3952#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3953 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
3954 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
3955
3956/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
3957DECL_INLINE_THROW(uint32_t)
3958iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
3959 bool fUpdateSkipping)
3960{
3961 RT_NOREF(fEflOutput);
3962 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
3963 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
3964
3965#ifdef VBOX_STRICT
3966 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
3967 uint32_t offFixup = off;
3968 off = iemNativeEmitJnzToFixed(pReNative, off, off);
3969 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
3970 iemNativeFixupFixedJump(pReNative, offFixup, off);
3971
3972 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
3973 offFixup = off;
3974 off = iemNativeEmitJzToFixed(pReNative, off, off);
3975 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
3976 iemNativeFixupFixedJump(pReNative, offFixup, off);
3977
3978 /** @todo validate that only bits in the fElfOutput mask changed. */
3979#endif
3980
3981#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3982 if (fUpdateSkipping)
3983 {
3984 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
3985 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
3986 else
3987 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
3988 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
3989 }
3990#else
3991 RT_NOREF_PV(fUpdateSkipping);
3992#endif
3993
3994 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
3995 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
3996 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
3997 return off;
3998}
3999
4000
4001
4002/*********************************************************************************************************************************
4003* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
4004*********************************************************************************************************************************/
4005
4006#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
4007 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
4008
4009#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
4010 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
4011
4012#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
4013 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
4014
4015
4016/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
4017 * IEM_MC_FETCH_SREG_ZX_U64. */
4018DECL_INLINE_THROW(uint32_t)
4019iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
4020{
4021 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4022 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
4023 Assert(iSReg < X86_SREG_COUNT);
4024
4025 /*
4026 * For now, we will not create a shadow copy of a selector. The rational
4027 * is that since we do not recompile the popping and loading of segment
4028 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
4029 * pushing and moving to registers, there is only a small chance that the
4030 * shadow copy will be accessed again before the register is reloaded. One
4031 * scenario would be nested called in 16-bit code, but I doubt it's worth
4032 * the extra register pressure atm.
4033 *
4034 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
4035 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
4036 * store scencario covered at present (r160730).
4037 */
4038 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4039 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4040 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
4041 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4042 return off;
4043}
4044
4045
4046
4047/*********************************************************************************************************************************
4048* Register references. *
4049*********************************************************************************************************************************/
4050
4051#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
4052 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
4053
4054#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
4055 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
4056
4057/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
4058DECL_INLINE_THROW(uint32_t)
4059iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
4060{
4061 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
4062 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4063 Assert(iGRegEx < 20);
4064
4065 if (iGRegEx < 16)
4066 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
4067 else
4068 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
4069
4070 /* If we've delayed writing back the register value, flush it now. */
4071 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
4072
4073 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4074 if (!fConst)
4075 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
4076
4077 return off;
4078}
4079
4080#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
4081 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
4082
4083#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
4084 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
4085
4086#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
4087 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
4088
4089#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
4090 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
4091
4092#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
4093 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
4094
4095#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
4096 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
4097
4098#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
4099 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
4100
4101#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
4102 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
4103
4104#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
4105 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
4106
4107#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
4108 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
4109
4110/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
4111DECL_INLINE_THROW(uint32_t)
4112iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
4113{
4114 Assert(iGReg < 16);
4115 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
4116 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4117
4118 /* If we've delayed writing back the register value, flush it now. */
4119 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
4120
4121 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4122 if (!fConst)
4123 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
4124
4125 return off;
4126}
4127
4128
4129#undef IEM_MC_REF_EFLAGS /* should not be used. */
4130#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
4131 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4132 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
4133
4134/** Handles IEM_MC_REF_EFLAGS. */
4135DECL_INLINE_THROW(uint32_t)
4136iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
4137{
4138 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
4139 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4140
4141#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
4142 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
4143
4144 /* Updating the skipping according to the outputs is a little early, but
4145 we don't have any other hooks for references atm. */
4146 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
4147 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4148 else if (fEflOutput & X86_EFL_STATUS_BITS)
4149 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
4150 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4151#else
4152 RT_NOREF(fEflInput, fEflOutput);
4153#endif
4154
4155 /* If we've delayed writing back the register value, flush it now. */
4156 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
4157
4158 /* If there is a shadow copy of guest EFLAGS, flush it now. */
4159 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
4160
4161 return off;
4162}
4163
4164
4165/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
4166 * different code from threaded recompiler, maybe it would be helpful. For now
4167 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
4168#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
4169
4170
4171#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
4172 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
4173
4174#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
4175 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
4176
4177#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
4178 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
4179
4180#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4181/* Just being paranoid here. */
4182# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
4183AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
4184AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
4185AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
4186AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
4187# endif
4188AssertCompileMemberOffset(X86XMMREG, au64, 0);
4189AssertCompileMemberOffset(X86XMMREG, au32, 0);
4190AssertCompileMemberOffset(X86XMMREG, ar64, 0);
4191AssertCompileMemberOffset(X86XMMREG, ar32, 0);
4192
4193# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
4194 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
4195# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
4196 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
4197# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
4198 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
4199# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
4200 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
4201#endif
4202
4203/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
4204DECL_INLINE_THROW(uint32_t)
4205iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
4206{
4207 Assert(iXReg < 16);
4208 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
4209 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4210
4211 /* If we've delayed writing back the register value, flush it now. */
4212 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
4213
4214#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4215 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4216 if (!fConst)
4217 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
4218#else
4219 RT_NOREF(fConst);
4220#endif
4221
4222 return off;
4223}
4224
4225
4226#define IEM_MC_REF_MXCSR(a_pfMxcsr) \
4227 off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
4228
4229/** Handles IEM_MC_REF_MXCSR. */
4230DECL_INLINE_THROW(uint32_t)
4231iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
4232{
4233 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
4234 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4235
4236 /* If we've delayed writing back the register value, flush it now. */
4237 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
4238
4239 /* If there is a shadow copy of guest MXCSR, flush it now. */
4240 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
4241
4242 return off;
4243}
4244
4245
4246
4247/*********************************************************************************************************************************
4248* Effective Address Calculation *
4249*********************************************************************************************************************************/
4250#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
4251 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
4252
4253/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
4254 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
4255DECL_INLINE_THROW(uint32_t)
4256iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4257 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
4258{
4259 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4260
4261 /*
4262 * Handle the disp16 form with no registers first.
4263 *
4264 * Convert to an immediate value, as that'll delay the register allocation
4265 * and assignment till the memory access / call / whatever and we can use
4266 * a more appropriate register (or none at all).
4267 */
4268 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
4269 {
4270 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
4271 return off;
4272 }
4273
4274 /* Determin the displacment. */
4275 uint16_t u16EffAddr;
4276 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4277 {
4278 case 0: u16EffAddr = 0; break;
4279 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
4280 case 2: u16EffAddr = u16Disp; break;
4281 default: AssertFailedStmt(u16EffAddr = 0);
4282 }
4283
4284 /* Determine the registers involved. */
4285 uint8_t idxGstRegBase;
4286 uint8_t idxGstRegIndex;
4287 switch (bRm & X86_MODRM_RM_MASK)
4288 {
4289 case 0:
4290 idxGstRegBase = X86_GREG_xBX;
4291 idxGstRegIndex = X86_GREG_xSI;
4292 break;
4293 case 1:
4294 idxGstRegBase = X86_GREG_xBX;
4295 idxGstRegIndex = X86_GREG_xDI;
4296 break;
4297 case 2:
4298 idxGstRegBase = X86_GREG_xBP;
4299 idxGstRegIndex = X86_GREG_xSI;
4300 break;
4301 case 3:
4302 idxGstRegBase = X86_GREG_xBP;
4303 idxGstRegIndex = X86_GREG_xDI;
4304 break;
4305 case 4:
4306 idxGstRegBase = X86_GREG_xSI;
4307 idxGstRegIndex = UINT8_MAX;
4308 break;
4309 case 5:
4310 idxGstRegBase = X86_GREG_xDI;
4311 idxGstRegIndex = UINT8_MAX;
4312 break;
4313 case 6:
4314 idxGstRegBase = X86_GREG_xBP;
4315 idxGstRegIndex = UINT8_MAX;
4316 break;
4317#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
4318 default:
4319#endif
4320 case 7:
4321 idxGstRegBase = X86_GREG_xBX;
4322 idxGstRegIndex = UINT8_MAX;
4323 break;
4324 }
4325
4326 /*
4327 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
4328 */
4329 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4330 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4331 kIemNativeGstRegUse_ReadOnly);
4332 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
4333 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4334 kIemNativeGstRegUse_ReadOnly)
4335 : UINT8_MAX;
4336#ifdef RT_ARCH_AMD64
4337 if (idxRegIndex == UINT8_MAX)
4338 {
4339 if (u16EffAddr == 0)
4340 {
4341 /* movxz ret, base */
4342 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
4343 }
4344 else
4345 {
4346 /* lea ret32, [base64 + disp32] */
4347 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4348 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4349 if (idxRegRet >= 8 || idxRegBase >= 8)
4350 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4351 pbCodeBuf[off++] = 0x8d;
4352 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4353 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
4354 else
4355 {
4356 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
4357 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4358 }
4359 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4360 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4361 pbCodeBuf[off++] = 0;
4362 pbCodeBuf[off++] = 0;
4363 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4364
4365 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4366 }
4367 }
4368 else
4369 {
4370 /* lea ret32, [index64 + base64 (+ disp32)] */
4371 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4372 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4373 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4374 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4375 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4376 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4377 pbCodeBuf[off++] = 0x8d;
4378 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
4379 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4380 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
4381 if (bMod == X86_MOD_MEM4)
4382 {
4383 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4384 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4385 pbCodeBuf[off++] = 0;
4386 pbCodeBuf[off++] = 0;
4387 }
4388 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4389 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4390 }
4391
4392#elif defined(RT_ARCH_ARM64)
4393 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4394 if (u16EffAddr == 0)
4395 {
4396 if (idxRegIndex == UINT8_MAX)
4397 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
4398 else
4399 {
4400 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
4401 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4402 }
4403 }
4404 else
4405 {
4406 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
4407 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
4408 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
4409 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4410 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
4411 else
4412 {
4413 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
4414 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4415 }
4416 if (idxRegIndex != UINT8_MAX)
4417 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
4418 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4419 }
4420
4421#else
4422# error "port me"
4423#endif
4424
4425 if (idxRegIndex != UINT8_MAX)
4426 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4427 iemNativeRegFreeTmp(pReNative, idxRegBase);
4428 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4429 return off;
4430}
4431
4432
4433#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
4434 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
4435
4436/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
4437 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
4438DECL_INLINE_THROW(uint32_t)
4439iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4440 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
4441{
4442 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4443
4444 /*
4445 * Handle the disp32 form with no registers first.
4446 *
4447 * Convert to an immediate value, as that'll delay the register allocation
4448 * and assignment till the memory access / call / whatever and we can use
4449 * a more appropriate register (or none at all).
4450 */
4451 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4452 {
4453 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
4454 return off;
4455 }
4456
4457 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4458 uint32_t u32EffAddr = 0;
4459 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4460 {
4461 case 0: break;
4462 case 1: u32EffAddr = (int8_t)u32Disp; break;
4463 case 2: u32EffAddr = u32Disp; break;
4464 default: AssertFailed();
4465 }
4466
4467 /* Get the register (or SIB) value. */
4468 uint8_t idxGstRegBase = UINT8_MAX;
4469 uint8_t idxGstRegIndex = UINT8_MAX;
4470 uint8_t cShiftIndex = 0;
4471 switch (bRm & X86_MODRM_RM_MASK)
4472 {
4473 case 0: idxGstRegBase = X86_GREG_xAX; break;
4474 case 1: idxGstRegBase = X86_GREG_xCX; break;
4475 case 2: idxGstRegBase = X86_GREG_xDX; break;
4476 case 3: idxGstRegBase = X86_GREG_xBX; break;
4477 case 4: /* SIB */
4478 {
4479 /* index /w scaling . */
4480 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4481 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4482 {
4483 case 0: idxGstRegIndex = X86_GREG_xAX; break;
4484 case 1: idxGstRegIndex = X86_GREG_xCX; break;
4485 case 2: idxGstRegIndex = X86_GREG_xDX; break;
4486 case 3: idxGstRegIndex = X86_GREG_xBX; break;
4487 case 4: cShiftIndex = 0; /*no index*/ break;
4488 case 5: idxGstRegIndex = X86_GREG_xBP; break;
4489 case 6: idxGstRegIndex = X86_GREG_xSI; break;
4490 case 7: idxGstRegIndex = X86_GREG_xDI; break;
4491 }
4492
4493 /* base */
4494 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
4495 {
4496 case 0: idxGstRegBase = X86_GREG_xAX; break;
4497 case 1: idxGstRegBase = X86_GREG_xCX; break;
4498 case 2: idxGstRegBase = X86_GREG_xDX; break;
4499 case 3: idxGstRegBase = X86_GREG_xBX; break;
4500 case 4:
4501 idxGstRegBase = X86_GREG_xSP;
4502 u32EffAddr += uSibAndRspOffset >> 8;
4503 break;
4504 case 5:
4505 if ((bRm & X86_MODRM_MOD_MASK) != 0)
4506 idxGstRegBase = X86_GREG_xBP;
4507 else
4508 {
4509 Assert(u32EffAddr == 0);
4510 u32EffAddr = u32Disp;
4511 }
4512 break;
4513 case 6: idxGstRegBase = X86_GREG_xSI; break;
4514 case 7: idxGstRegBase = X86_GREG_xDI; break;
4515 }
4516 break;
4517 }
4518 case 5: idxGstRegBase = X86_GREG_xBP; break;
4519 case 6: idxGstRegBase = X86_GREG_xSI; break;
4520 case 7: idxGstRegBase = X86_GREG_xDI; break;
4521 }
4522
4523 /*
4524 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4525 * the start of the function.
4526 */
4527 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4528 {
4529 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
4530 return off;
4531 }
4532
4533 /*
4534 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4535 */
4536 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4537 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4538 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4539 kIemNativeGstRegUse_ReadOnly);
4540 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4541 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4542 kIemNativeGstRegUse_ReadOnly);
4543
4544 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4545 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4546 {
4547 idxRegBase = idxRegIndex;
4548 idxRegIndex = UINT8_MAX;
4549 }
4550
4551#ifdef RT_ARCH_AMD64
4552 if (idxRegIndex == UINT8_MAX)
4553 {
4554 if (u32EffAddr == 0)
4555 {
4556 /* mov ret, base */
4557 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4558 }
4559 else
4560 {
4561 /* lea ret32, [base64 + disp32] */
4562 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4563 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4564 if (idxRegRet >= 8 || idxRegBase >= 8)
4565 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4566 pbCodeBuf[off++] = 0x8d;
4567 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4568 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4569 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4570 else
4571 {
4572 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4573 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4574 }
4575 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4576 if (bMod == X86_MOD_MEM4)
4577 {
4578 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4579 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4580 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4581 }
4582 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4583 }
4584 }
4585 else
4586 {
4587 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4588 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4589 if (idxRegBase == UINT8_MAX)
4590 {
4591 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
4592 if (idxRegRet >= 8 || idxRegIndex >= 8)
4593 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4594 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4595 pbCodeBuf[off++] = 0x8d;
4596 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4597 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4598 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4599 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4600 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4601 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4602 }
4603 else
4604 {
4605 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4606 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4607 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4608 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4609 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4610 pbCodeBuf[off++] = 0x8d;
4611 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4612 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4613 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4614 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4615 if (bMod != X86_MOD_MEM0)
4616 {
4617 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4618 if (bMod == X86_MOD_MEM4)
4619 {
4620 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4621 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4622 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4623 }
4624 }
4625 }
4626 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4627 }
4628
4629#elif defined(RT_ARCH_ARM64)
4630 if (u32EffAddr == 0)
4631 {
4632 if (idxRegIndex == UINT8_MAX)
4633 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4634 else if (idxRegBase == UINT8_MAX)
4635 {
4636 if (cShiftIndex == 0)
4637 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
4638 else
4639 {
4640 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4641 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
4642 }
4643 }
4644 else
4645 {
4646 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4647 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
4648 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4649 }
4650 }
4651 else
4652 {
4653 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
4654 {
4655 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4656 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
4657 }
4658 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
4659 {
4660 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4661 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4662 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
4663 }
4664 else
4665 {
4666 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
4667 if (idxRegBase != UINT8_MAX)
4668 {
4669 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4670 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4671 }
4672 }
4673 if (idxRegIndex != UINT8_MAX)
4674 {
4675 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4676 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
4677 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4678 }
4679 }
4680
4681#else
4682# error "port me"
4683#endif
4684
4685 if (idxRegIndex != UINT8_MAX)
4686 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4687 if (idxRegBase != UINT8_MAX)
4688 iemNativeRegFreeTmp(pReNative, idxRegBase);
4689 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4690 return off;
4691}
4692
4693
4694#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4695 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4696 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4697
4698#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4699 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4700 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4701
4702#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4703 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4704 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
4705
4706/**
4707 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
4708 *
4709 * @returns New off.
4710 * @param pReNative .
4711 * @param off .
4712 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
4713 * bit 4 to REX.X. The two bits are part of the
4714 * REG sub-field, which isn't needed in this
4715 * function.
4716 * @param uSibAndRspOffset Two parts:
4717 * - The first 8 bits make up the SIB byte.
4718 * - The next 8 bits are the fixed RSP/ESP offset
4719 * in case of a pop [xSP].
4720 * @param u32Disp The displacement byte/word/dword, if any.
4721 * @param cbInstr The size of the fully decoded instruction. Used
4722 * for RIP relative addressing.
4723 * @param idxVarRet The result variable number.
4724 * @param f64Bit Whether to use a 64-bit or 32-bit address size
4725 * when calculating the address.
4726 *
4727 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
4728 */
4729DECL_INLINE_THROW(uint32_t)
4730iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
4731 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
4732{
4733 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4734
4735 /*
4736 * Special case the rip + disp32 form first.
4737 */
4738 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4739 {
4740#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4741 /* Need to take the current PC offset into account for the displacement, no need to flush here
4742 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
4743 u32Disp += pReNative->Core.offPc;
4744#endif
4745
4746 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4747 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
4748 kIemNativeGstRegUse_ReadOnly);
4749#ifdef RT_ARCH_AMD64
4750 if (f64Bit)
4751 {
4752 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
4753 if ((int32_t)offFinalDisp == offFinalDisp)
4754 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
4755 else
4756 {
4757 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
4758 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
4759 }
4760 }
4761 else
4762 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
4763
4764#elif defined(RT_ARCH_ARM64)
4765 if (f64Bit)
4766 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4767 (int64_t)(int32_t)u32Disp + cbInstr);
4768 else
4769 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4770 (int32_t)u32Disp + cbInstr);
4771
4772#else
4773# error "Port me!"
4774#endif
4775 iemNativeRegFreeTmp(pReNative, idxRegPc);
4776 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4777 return off;
4778 }
4779
4780 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4781 int64_t i64EffAddr = 0;
4782 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4783 {
4784 case 0: break;
4785 case 1: i64EffAddr = (int8_t)u32Disp; break;
4786 case 2: i64EffAddr = (int32_t)u32Disp; break;
4787 default: AssertFailed();
4788 }
4789
4790 /* Get the register (or SIB) value. */
4791 uint8_t idxGstRegBase = UINT8_MAX;
4792 uint8_t idxGstRegIndex = UINT8_MAX;
4793 uint8_t cShiftIndex = 0;
4794 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
4795 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
4796 else /* SIB: */
4797 {
4798 /* index /w scaling . */
4799 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4800 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4801 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
4802 if (idxGstRegIndex == 4)
4803 {
4804 /* no index */
4805 cShiftIndex = 0;
4806 idxGstRegIndex = UINT8_MAX;
4807 }
4808
4809 /* base */
4810 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
4811 if (idxGstRegBase == 4)
4812 {
4813 /* pop [rsp] hack */
4814 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
4815 }
4816 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
4817 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
4818 {
4819 /* mod=0 and base=5 -> disp32, no base reg. */
4820 Assert(i64EffAddr == 0);
4821 i64EffAddr = (int32_t)u32Disp;
4822 idxGstRegBase = UINT8_MAX;
4823 }
4824 }
4825
4826 /*
4827 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4828 * the start of the function.
4829 */
4830 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4831 {
4832 if (f64Bit)
4833 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
4834 else
4835 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
4836 return off;
4837 }
4838
4839 /*
4840 * Now emit code that calculates:
4841 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4842 * or if !f64Bit:
4843 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4844 */
4845 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4846 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4847 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4848 kIemNativeGstRegUse_ReadOnly);
4849 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4850 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4851 kIemNativeGstRegUse_ReadOnly);
4852
4853 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4854 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4855 {
4856 idxRegBase = idxRegIndex;
4857 idxRegIndex = UINT8_MAX;
4858 }
4859
4860#ifdef RT_ARCH_AMD64
4861 uint8_t bFinalAdj;
4862 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
4863 bFinalAdj = 0; /* likely */
4864 else
4865 {
4866 /* pop [rsp] with a problematic disp32 value. Split out the
4867 RSP offset and add it separately afterwards (bFinalAdj). */
4868 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
4869 Assert(idxGstRegBase == X86_GREG_xSP);
4870 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
4871 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
4872 Assert(bFinalAdj != 0);
4873 i64EffAddr -= bFinalAdj;
4874 Assert((int32_t)i64EffAddr == i64EffAddr);
4875 }
4876 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
4877//pReNative->pInstrBuf[off++] = 0xcc;
4878
4879 if (idxRegIndex == UINT8_MAX)
4880 {
4881 if (u32EffAddr == 0)
4882 {
4883 /* mov ret, base */
4884 if (f64Bit)
4885 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
4886 else
4887 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4888 }
4889 else
4890 {
4891 /* lea ret, [base + disp32] */
4892 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4893 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4894 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
4895 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4896 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4897 | (f64Bit ? X86_OP_REX_W : 0);
4898 pbCodeBuf[off++] = 0x8d;
4899 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4900 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4901 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4902 else
4903 {
4904 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4905 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4906 }
4907 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4908 if (bMod == X86_MOD_MEM4)
4909 {
4910 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4911 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4912 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4913 }
4914 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4915 }
4916 }
4917 else
4918 {
4919 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4920 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4921 if (idxRegBase == UINT8_MAX)
4922 {
4923 /* lea ret, [(index64 << cShiftIndex) + disp32] */
4924 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
4925 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4926 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
4927 | (f64Bit ? X86_OP_REX_W : 0);
4928 pbCodeBuf[off++] = 0x8d;
4929 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4930 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4931 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4932 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4933 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4934 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4935 }
4936 else
4937 {
4938 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4939 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4940 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4941 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4942 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
4943 | (f64Bit ? X86_OP_REX_W : 0);
4944 pbCodeBuf[off++] = 0x8d;
4945 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4946 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4947 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4948 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4949 if (bMod != X86_MOD_MEM0)
4950 {
4951 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4952 if (bMod == X86_MOD_MEM4)
4953 {
4954 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4955 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4956 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4957 }
4958 }
4959 }
4960 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4961 }
4962
4963 if (!bFinalAdj)
4964 { /* likely */ }
4965 else
4966 {
4967 Assert(f64Bit);
4968 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
4969 }
4970
4971#elif defined(RT_ARCH_ARM64)
4972 if (i64EffAddr == 0)
4973 {
4974 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4975 if (idxRegIndex == UINT8_MAX)
4976 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
4977 else if (idxRegBase != UINT8_MAX)
4978 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
4979 f64Bit, false /*fSetFlags*/, cShiftIndex);
4980 else
4981 {
4982 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
4983 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
4984 }
4985 }
4986 else
4987 {
4988 if (f64Bit)
4989 { /* likely */ }
4990 else
4991 i64EffAddr = (int32_t)i64EffAddr;
4992
4993 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
4994 {
4995 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4996 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
4997 }
4998 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
4999 {
5000 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5001 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
5002 }
5003 else
5004 {
5005 if (f64Bit)
5006 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
5007 else
5008 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
5009 if (idxRegBase != UINT8_MAX)
5010 {
5011 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5012 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
5013 }
5014 }
5015 if (idxRegIndex != UINT8_MAX)
5016 {
5017 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5018 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5019 f64Bit, false /*fSetFlags*/, cShiftIndex);
5020 }
5021 }
5022
5023#else
5024# error "port me"
5025#endif
5026
5027 if (idxRegIndex != UINT8_MAX)
5028 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5029 if (idxRegBase != UINT8_MAX)
5030 iemNativeRegFreeTmp(pReNative, idxRegBase);
5031 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5032 return off;
5033}
5034
5035
5036/*********************************************************************************************************************************
5037* Memory fetches and stores common *
5038*********************************************************************************************************************************/
5039
5040typedef enum IEMNATIVEMITMEMOP
5041{
5042 kIemNativeEmitMemOp_Store = 0,
5043 kIemNativeEmitMemOp_Fetch,
5044 kIemNativeEmitMemOp_Fetch_Zx_U16,
5045 kIemNativeEmitMemOp_Fetch_Zx_U32,
5046 kIemNativeEmitMemOp_Fetch_Zx_U64,
5047 kIemNativeEmitMemOp_Fetch_Sx_U16,
5048 kIemNativeEmitMemOp_Fetch_Sx_U32,
5049 kIemNativeEmitMemOp_Fetch_Sx_U64
5050} IEMNATIVEMITMEMOP;
5051
5052/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
5053 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
5054 * (with iSegReg = UINT8_MAX). */
5055DECL_INLINE_THROW(uint32_t)
5056iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
5057 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
5058 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
5059{
5060 /*
5061 * Assert sanity.
5062 */
5063 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
5064 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
5065 Assert( enmOp != kIemNativeEmitMemOp_Store
5066 || pVarValue->enmKind == kIemNativeVarKind_Immediate
5067 || pVarValue->enmKind == kIemNativeVarKind_Stack);
5068 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
5069 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
5070 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
5071 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
5072 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5073 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
5074#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5075 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
5076 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
5077#else
5078 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
5079#endif
5080 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5081#ifdef VBOX_STRICT
5082 if (iSegReg == UINT8_MAX)
5083 {
5084 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
5085 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
5086 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
5087 switch (cbMem)
5088 {
5089 case 1:
5090 Assert( pfnFunction
5091 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
5092 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5093 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5094 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5095 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5096 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
5097 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
5098 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
5099 : UINT64_C(0xc000b000a0009000) ));
5100 break;
5101 case 2:
5102 Assert( pfnFunction
5103 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
5104 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5105 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5106 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5107 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
5108 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
5109 : UINT64_C(0xc000b000a0009000) ));
5110 break;
5111 case 4:
5112 Assert( pfnFunction
5113 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
5114 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
5115 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
5116 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
5117 : UINT64_C(0xc000b000a0009000) ));
5118 break;
5119 case 8:
5120 Assert( pfnFunction
5121 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
5122 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
5123 : UINT64_C(0xc000b000a0009000) ));
5124 break;
5125#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5126 case sizeof(RTUINT128U):
5127 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5128 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
5129 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
5130 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
5131 || ( enmOp == kIemNativeEmitMemOp_Store
5132 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
5133 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
5134 break;
5135 case sizeof(RTUINT256U):
5136 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5137 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
5138 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
5139 || ( enmOp == kIemNativeEmitMemOp_Store
5140 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
5141 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
5142 break;
5143#endif
5144 }
5145 }
5146 else
5147 {
5148 Assert(iSegReg < 6);
5149 switch (cbMem)
5150 {
5151 case 1:
5152 Assert( pfnFunction
5153 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
5154 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
5155 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5156 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5157 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5158 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
5159 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
5160 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
5161 : UINT64_C(0xc000b000a0009000) ));
5162 break;
5163 case 2:
5164 Assert( pfnFunction
5165 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
5166 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
5167 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
5168 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
5169 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
5170 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
5171 : UINT64_C(0xc000b000a0009000) ));
5172 break;
5173 case 4:
5174 Assert( pfnFunction
5175 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
5176 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
5177 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
5178 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
5179 : UINT64_C(0xc000b000a0009000) ));
5180 break;
5181 case 8:
5182 Assert( pfnFunction
5183 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
5184 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
5185 : UINT64_C(0xc000b000a0009000) ));
5186 break;
5187#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5188 case sizeof(RTUINT128U):
5189 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5190 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
5191 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
5192 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
5193 || ( enmOp == kIemNativeEmitMemOp_Store
5194 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
5195 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
5196 break;
5197 case sizeof(RTUINT256U):
5198 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5199 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
5200 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
5201 || ( enmOp == kIemNativeEmitMemOp_Store
5202 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
5203 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
5204 break;
5205#endif
5206 }
5207 }
5208#endif
5209
5210#ifdef VBOX_STRICT
5211 /*
5212 * Check that the fExec flags we've got make sense.
5213 */
5214 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
5215#endif
5216
5217 /*
5218 * To keep things simple we have to commit any pending writes first as we
5219 * may end up making calls.
5220 */
5221 /** @todo we could postpone this till we make the call and reload the
5222 * registers after returning from the call. Not sure if that's sensible or
5223 * not, though. */
5224#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5225 off = iemNativeRegFlushPendingWrites(pReNative, off);
5226#else
5227 /* The program counter is treated differently for now. */
5228 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
5229#endif
5230
5231#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5232 /*
5233 * Move/spill/flush stuff out of call-volatile registers.
5234 * This is the easy way out. We could contain this to the tlb-miss branch
5235 * by saving and restoring active stuff here.
5236 */
5237 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
5238#endif
5239
5240 /*
5241 * Define labels and allocate the result register (trying for the return
5242 * register if we can).
5243 */
5244 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
5245#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5246 uint8_t idxRegValueFetch = UINT8_MAX;
5247
5248 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5249 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5250 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
5251 else
5252 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5253 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5254 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5255 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5256#else
5257 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5258 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5259 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5260 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5261#endif
5262 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
5263
5264#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5265 uint8_t idxRegValueStore = UINT8_MAX;
5266
5267 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5268 idxRegValueStore = !TlbState.fSkip
5269 && enmOp == kIemNativeEmitMemOp_Store
5270 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5271 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5272 : UINT8_MAX;
5273 else
5274 idxRegValueStore = !TlbState.fSkip
5275 && enmOp == kIemNativeEmitMemOp_Store
5276 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5277 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5278 : UINT8_MAX;
5279
5280#else
5281 uint8_t const idxRegValueStore = !TlbState.fSkip
5282 && enmOp == kIemNativeEmitMemOp_Store
5283 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5284 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5285 : UINT8_MAX;
5286#endif
5287 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
5288 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
5289 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
5290 : UINT32_MAX;
5291
5292 /*
5293 * Jump to the TLB lookup code.
5294 */
5295 if (!TlbState.fSkip)
5296 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
5297
5298 /*
5299 * TlbMiss:
5300 *
5301 * Call helper to do the fetching.
5302 * We flush all guest register shadow copies here.
5303 */
5304 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
5305
5306#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5307 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5308#else
5309 RT_NOREF(idxInstr);
5310#endif
5311
5312#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5313 if (pReNative->Core.offPc)
5314 {
5315 /*
5316 * Update the program counter but restore it at the end of the TlbMiss branch.
5317 * This should allow delaying more program counter updates for the TlbLookup and hit paths
5318 * which are hopefully much more frequent, reducing the amount of memory accesses.
5319 */
5320 /* Allocate a temporary PC register. */
5321 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5322
5323 /* Perform the addition and store the result. */
5324 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5325 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5326
5327 /* Free and flush the PC register. */
5328 iemNativeRegFreeTmp(pReNative, idxPcReg);
5329 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5330 }
5331#endif
5332
5333#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5334 /* Save variables in volatile registers. */
5335 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
5336 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
5337 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
5338 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
5339#endif
5340
5341 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
5342 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5343#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5344 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5345 {
5346 /*
5347 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
5348 *
5349 * @note There was a register variable assigned to the variable for the TlbLookup case above
5350 * which must not be freed or the value loaded into the register will not be synced into the register
5351 * further down the road because the variable doesn't know it had a variable assigned.
5352 *
5353 * @note For loads it is not required to sync what is in the assigned register with the stack slot
5354 * as it will be overwritten anyway.
5355 */
5356 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5357 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
5358 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
5359 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5360 }
5361 else
5362#endif
5363 if (enmOp == kIemNativeEmitMemOp_Store)
5364 {
5365 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5366 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
5367#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5368 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5369#else
5370 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
5371 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5372#endif
5373 }
5374
5375 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
5376 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
5377#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5378 fVolGregMask);
5379#else
5380 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
5381#endif
5382
5383 if (iSegReg != UINT8_MAX)
5384 {
5385 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
5386 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
5387 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
5388 }
5389
5390 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
5391 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5392
5393 /* Done setting up parameters, make the call. */
5394 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
5395
5396 /*
5397 * Put the result in the right register if this is a fetch.
5398 */
5399 if (enmOp != kIemNativeEmitMemOp_Store)
5400 {
5401#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5402 if ( cbMem == sizeof(RTUINT128U)
5403 || cbMem == sizeof(RTUINT256U))
5404 {
5405 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
5406
5407 /* Sync the value on the stack with the host register assigned to the variable. */
5408 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
5409 }
5410 else
5411#endif
5412 {
5413 Assert(idxRegValueFetch == pVarValue->idxReg);
5414 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
5415 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
5416 }
5417 }
5418
5419#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5420 /* Restore variables and guest shadow registers to volatile registers. */
5421 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
5422 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
5423#endif
5424
5425#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5426 if (pReNative->Core.offPc)
5427 {
5428 /*
5429 * Time to restore the program counter to its original value.
5430 */
5431 /* Allocate a temporary PC register. */
5432 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5433
5434 /* Restore the original value. */
5435 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5436 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5437
5438 /* Free and flush the PC register. */
5439 iemNativeRegFreeTmp(pReNative, idxPcReg);
5440 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5441 }
5442#endif
5443
5444#ifdef IEMNATIVE_WITH_TLB_LOOKUP
5445 if (!TlbState.fSkip)
5446 {
5447 /* end of TlbMiss - Jump to the done label. */
5448 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
5449 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
5450
5451 /*
5452 * TlbLookup:
5453 */
5454 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
5455 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
5456 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
5457
5458 /*
5459 * Emit code to do the actual storing / fetching.
5460 */
5461 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
5462# ifdef VBOX_WITH_STATISTICS
5463 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
5464 enmOp == kIemNativeEmitMemOp_Store
5465 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
5466 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
5467# endif
5468 switch (enmOp)
5469 {
5470 case kIemNativeEmitMemOp_Store:
5471 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
5472 {
5473 switch (cbMem)
5474 {
5475 case 1:
5476 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5477 break;
5478 case 2:
5479 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5480 break;
5481 case 4:
5482 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5483 break;
5484 case 8:
5485 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5486 break;
5487#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5488 case sizeof(RTUINT128U):
5489 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5490 break;
5491 case sizeof(RTUINT256U):
5492 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5493 break;
5494#endif
5495 default:
5496 AssertFailed();
5497 }
5498 }
5499 else
5500 {
5501 switch (cbMem)
5502 {
5503 case 1:
5504 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
5505 idxRegMemResult, TlbState.idxReg1);
5506 break;
5507 case 2:
5508 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
5509 idxRegMemResult, TlbState.idxReg1);
5510 break;
5511 case 4:
5512 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
5513 idxRegMemResult, TlbState.idxReg1);
5514 break;
5515 case 8:
5516 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
5517 idxRegMemResult, TlbState.idxReg1);
5518 break;
5519 default:
5520 AssertFailed();
5521 }
5522 }
5523 break;
5524
5525 case kIemNativeEmitMemOp_Fetch:
5526 case kIemNativeEmitMemOp_Fetch_Zx_U16:
5527 case kIemNativeEmitMemOp_Fetch_Zx_U32:
5528 case kIemNativeEmitMemOp_Fetch_Zx_U64:
5529 switch (cbMem)
5530 {
5531 case 1:
5532 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5533 break;
5534 case 2:
5535 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5536 break;
5537 case 4:
5538 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5539 break;
5540 case 8:
5541 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5542 break;
5543#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5544 case sizeof(RTUINT128U):
5545 /*
5546 * No need to sync back the register with the stack, this is done by the generic variable handling
5547 * code if there is a register assigned to a variable and the stack must be accessed.
5548 */
5549 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5550 break;
5551 case sizeof(RTUINT256U):
5552 /*
5553 * No need to sync back the register with the stack, this is done by the generic variable handling
5554 * code if there is a register assigned to a variable and the stack must be accessed.
5555 */
5556 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5557 break;
5558#endif
5559 default:
5560 AssertFailed();
5561 }
5562 break;
5563
5564 case kIemNativeEmitMemOp_Fetch_Sx_U16:
5565 Assert(cbMem == 1);
5566 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5567 break;
5568
5569 case kIemNativeEmitMemOp_Fetch_Sx_U32:
5570 Assert(cbMem == 1 || cbMem == 2);
5571 if (cbMem == 1)
5572 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5573 else
5574 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5575 break;
5576
5577 case kIemNativeEmitMemOp_Fetch_Sx_U64:
5578 switch (cbMem)
5579 {
5580 case 1:
5581 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5582 break;
5583 case 2:
5584 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5585 break;
5586 case 4:
5587 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5588 break;
5589 default:
5590 AssertFailed();
5591 }
5592 break;
5593
5594 default:
5595 AssertFailed();
5596 }
5597
5598 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
5599
5600 /*
5601 * TlbDone:
5602 */
5603 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
5604
5605 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
5606
5607# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5608 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
5609 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5610# endif
5611 }
5612#else
5613 RT_NOREF(fAlignMask, idxLabelTlbMiss);
5614#endif
5615
5616 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
5617 iemNativeVarRegisterRelease(pReNative, idxVarValue);
5618 return off;
5619}
5620
5621
5622
5623/*********************************************************************************************************************************
5624* Memory fetches (IEM_MEM_FETCH_XXX). *
5625*********************************************************************************************************************************/
5626
5627/* 8-bit segmented: */
5628#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
5629 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
5630 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5631 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5632
5633#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5634 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5635 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5636 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5637
5638#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5639 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5640 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5641 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5642
5643#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5644 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5645 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5646 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5647
5648#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5649 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5650 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5651 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5652
5653#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5654 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5655 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5656 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5657
5658#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5659 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5660 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5661 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5662
5663/* 16-bit segmented: */
5664#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5665 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5666 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5667 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5668
5669#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5670 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5671 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5672 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5673
5674#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5675 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5676 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5677 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5678
5679#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5680 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5681 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5682 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5683
5684#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5685 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5686 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5687 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5688
5689#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5690 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5691 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5692 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5693
5694
5695/* 32-bit segmented: */
5696#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5697 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5698 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5699 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5700
5701#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5702 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5703 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5704 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5705
5706#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5707 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5708 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5709 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5710
5711#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5712 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5713 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5714 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5715
5716AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
5717#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
5718 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
5719 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
5720 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5721
5722
5723/* 64-bit segmented: */
5724#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5725 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5726 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5727 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5728
5729AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
5730#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
5731 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
5732 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
5733 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5734
5735
5736/* 8-bit flat: */
5737#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
5738 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
5739 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5740 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5741
5742#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
5743 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5744 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5745 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5746
5747#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
5748 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5749 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5750 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5751
5752#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
5753 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5754 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5755 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5756
5757#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
5758 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5759 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5760 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5761
5762#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
5763 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5764 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5765 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5766
5767#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
5768 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5769 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5770 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5771
5772
5773/* 16-bit flat: */
5774#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
5775 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5776 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5777 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5778
5779#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
5780 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5781 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5782 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5783
5784#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
5785 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5786 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5787 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5788
5789#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
5790 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5791 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5792 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5793
5794#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
5795 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5796 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5797 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5798
5799#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
5800 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5801 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5802 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5803
5804/* 32-bit flat: */
5805#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
5806 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5807 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5808 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5809
5810#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
5811 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5812 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5813 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5814
5815#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
5816 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5817 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5818 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5819
5820#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
5821 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5822 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5823 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5824
5825#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
5826 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
5827 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
5828 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5829
5830
5831/* 64-bit flat: */
5832#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
5833 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5834 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5835 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
5836
5837#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
5838 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
5839 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
5840 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
5841
5842#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5843/* 128-bit segmented: */
5844#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
5845 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5846 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5847 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
5848
5849#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
5850 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5851 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5852 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
5853
5854AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
5855#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
5856 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, \
5857 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
5858 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
5859
5860#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
5861 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5862 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5863 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
5864
5865/* 128-bit flat: */
5866#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
5867 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
5868 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5869 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
5870
5871#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
5872 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
5873 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5874 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
5875
5876#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
5877 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
5878 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
5879 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
5880
5881#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
5882 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
5883 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5884 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
5885
5886/* 256-bit segmented: */
5887#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
5888 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
5889 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5890 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
5891
5892#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
5893 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
5894 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5895 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
5896
5897#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
5898 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
5899 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5900 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
5901
5902
5903/* 256-bit flat: */
5904#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
5905 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
5906 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5907 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
5908
5909#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
5910 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
5911 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5912 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
5913
5914#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
5915 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
5916 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5917 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
5918#endif
5919
5920
5921/*********************************************************************************************************************************
5922* Memory stores (IEM_MEM_STORE_XXX). *
5923*********************************************************************************************************************************/
5924
5925#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
5926 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
5927 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
5928 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
5929
5930#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
5931 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
5932 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
5933 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
5934
5935#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
5936 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
5937 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
5938 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
5939
5940#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
5941 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
5942 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
5943 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
5944
5945
5946#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
5947 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
5948 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
5949 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
5950
5951#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
5952 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
5953 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
5954 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
5955
5956#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
5957 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
5958 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
5959 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
5960
5961#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
5962 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
5963 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
5964 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
5965
5966
5967#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
5968 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
5969 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
5970
5971#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
5972 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
5973 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
5974
5975#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
5976 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
5977 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
5978
5979#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
5980 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
5981 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
5982
5983
5984#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
5985 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
5986 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
5987
5988#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
5989 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
5990 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
5991
5992#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
5993 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
5994 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
5995
5996#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
5997 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
5998 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
5999
6000/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
6001 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
6002DECL_INLINE_THROW(uint32_t)
6003iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
6004 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
6005{
6006 /*
6007 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
6008 * to do the grunt work.
6009 */
6010 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
6011 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
6012 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
6013 pfnFunction, idxInstr);
6014 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
6015 return off;
6016}
6017
6018
6019#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6020# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
6021 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
6022 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6023 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
6024
6025# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
6026 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
6027 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6028 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
6029
6030# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
6031 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
6032 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6033 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
6034
6035# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
6036 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
6037 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6038 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
6039
6040
6041# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
6042 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
6043 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6044 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
6045
6046# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
6047 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
6048 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6049 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
6050
6051# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
6052 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
6053 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6054 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
6055
6056# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
6057 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
6058 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6059 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
6060#endif
6061
6062
6063
6064/*********************************************************************************************************************************
6065* Stack Accesses. *
6066*********************************************************************************************************************************/
6067/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
6068#define IEM_MC_PUSH_U16(a_u16Value) \
6069 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6070 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
6071#define IEM_MC_PUSH_U32(a_u32Value) \
6072 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6073 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
6074#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
6075 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
6076 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
6077#define IEM_MC_PUSH_U64(a_u64Value) \
6078 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6079 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
6080
6081#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
6082 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6083 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
6084#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
6085 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6086 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
6087#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
6088 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
6089 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
6090
6091#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
6092 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6093 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
6094#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
6095 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6096 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
6097
6098
6099DECL_FORCE_INLINE_THROW(uint32_t)
6100iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6101{
6102 /* Use16BitSp: */
6103#ifdef RT_ARCH_AMD64
6104 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6105 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6106#else
6107 /* sub regeff, regrsp, #cbMem */
6108 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
6109 /* and regeff, regeff, #0xffff */
6110 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6111 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
6112 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
6113 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
6114#endif
6115 return off;
6116}
6117
6118
6119DECL_FORCE_INLINE(uint32_t)
6120iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6121{
6122 /* Use32BitSp: */
6123 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6124 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6125 return off;
6126}
6127
6128
6129/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
6130DECL_INLINE_THROW(uint32_t)
6131iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
6132 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6133{
6134 /*
6135 * Assert sanity.
6136 */
6137 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6138 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6139#ifdef VBOX_STRICT
6140 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6141 {
6142 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6143 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6144 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6145 Assert( pfnFunction
6146 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
6147 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
6148 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
6149 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
6150 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
6151 : UINT64_C(0xc000b000a0009000) ));
6152 }
6153 else
6154 Assert( pfnFunction
6155 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
6156 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
6157 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
6158 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
6159 : UINT64_C(0xc000b000a0009000) ));
6160#endif
6161
6162#ifdef VBOX_STRICT
6163 /*
6164 * Check that the fExec flags we've got make sense.
6165 */
6166 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6167#endif
6168
6169 /*
6170 * To keep things simple we have to commit any pending writes first as we
6171 * may end up making calls.
6172 */
6173 /** @todo we could postpone this till we make the call and reload the
6174 * registers after returning from the call. Not sure if that's sensible or
6175 * not, though. */
6176 off = iemNativeRegFlushPendingWrites(pReNative, off);
6177
6178 /*
6179 * First we calculate the new RSP and the effective stack pointer value.
6180 * For 64-bit mode and flat 32-bit these two are the same.
6181 * (Code structure is very similar to that of PUSH)
6182 */
6183 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6184 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
6185 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
6186 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
6187 ? cbMem : sizeof(uint16_t);
6188 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6189 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6190 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6191 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6192 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6193 if (cBitsFlat != 0)
6194 {
6195 Assert(idxRegEffSp == idxRegRsp);
6196 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6197 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6198 if (cBitsFlat == 64)
6199 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
6200 else
6201 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
6202 }
6203 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6204 {
6205 Assert(idxRegEffSp != idxRegRsp);
6206 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6207 kIemNativeGstRegUse_ReadOnly);
6208#ifdef RT_ARCH_AMD64
6209 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6210#else
6211 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6212#endif
6213 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6214 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6215 offFixupJumpToUseOtherBitSp = off;
6216 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6217 {
6218 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6219 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6220 }
6221 else
6222 {
6223 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6224 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6225 }
6226 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6227 }
6228 /* SpUpdateEnd: */
6229 uint32_t const offLabelSpUpdateEnd = off;
6230
6231 /*
6232 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6233 * we're skipping lookup).
6234 */
6235 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6236 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
6237 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6238 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6239 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6240 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6241 : UINT32_MAX;
6242 uint8_t const idxRegValue = !TlbState.fSkip
6243 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6244 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
6245 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
6246 : UINT8_MAX;
6247 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6248
6249
6250 if (!TlbState.fSkip)
6251 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6252 else
6253 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6254
6255 /*
6256 * Use16BitSp:
6257 */
6258 if (cBitsFlat == 0)
6259 {
6260#ifdef RT_ARCH_AMD64
6261 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6262#else
6263 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6264#endif
6265 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6266 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6267 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6268 else
6269 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6270 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6271 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6272 }
6273
6274 /*
6275 * TlbMiss:
6276 *
6277 * Call helper to do the pushing.
6278 */
6279 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6280
6281#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6282 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6283#else
6284 RT_NOREF(idxInstr);
6285#endif
6286
6287 /* Save variables in volatile registers. */
6288 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6289 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6290 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
6291 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
6292 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6293
6294 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
6295 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
6296 {
6297 /* Swap them using ARG0 as temp register: */
6298 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
6299 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
6300 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
6301 }
6302 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
6303 {
6304 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
6305 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
6306 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6307
6308 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
6309 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6310 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6311 }
6312 else
6313 {
6314 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
6315 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6316
6317 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
6318 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
6319 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
6320 }
6321
6322 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6323 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6324
6325 /* Done setting up parameters, make the call. */
6326 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6327
6328 /* Restore variables and guest shadow registers to volatile registers. */
6329 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6330 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6331
6332#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6333 if (!TlbState.fSkip)
6334 {
6335 /* end of TlbMiss - Jump to the done label. */
6336 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6337 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6338
6339 /*
6340 * TlbLookup:
6341 */
6342 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
6343 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6344
6345 /*
6346 * Emit code to do the actual storing / fetching.
6347 */
6348 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6349# ifdef VBOX_WITH_STATISTICS
6350 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6351 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6352# endif
6353 if (idxRegValue != UINT8_MAX)
6354 {
6355 switch (cbMemAccess)
6356 {
6357 case 2:
6358 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6359 break;
6360 case 4:
6361 if (!fIsIntelSeg)
6362 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6363 else
6364 {
6365 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
6366 PUSH FS in real mode, so we have to try emulate that here.
6367 We borrow the now unused idxReg1 from the TLB lookup code here. */
6368 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
6369 kIemNativeGstReg_EFlags);
6370 if (idxRegEfl != UINT8_MAX)
6371 {
6372#ifdef ARCH_AMD64
6373 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
6374 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6375 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6376#else
6377 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
6378 off, TlbState.idxReg1, idxRegEfl,
6379 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6380#endif
6381 iemNativeRegFreeTmp(pReNative, idxRegEfl);
6382 }
6383 else
6384 {
6385 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
6386 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
6387 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6388 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6389 }
6390 /* ASSUMES the upper half of idxRegValue is ZERO. */
6391 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
6392 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
6393 }
6394 break;
6395 case 8:
6396 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6397 break;
6398 default:
6399 AssertFailed();
6400 }
6401 }
6402 else
6403 {
6404 switch (cbMemAccess)
6405 {
6406 case 2:
6407 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6408 idxRegMemResult, TlbState.idxReg1);
6409 break;
6410 case 4:
6411 Assert(!fIsSegReg);
6412 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6413 idxRegMemResult, TlbState.idxReg1);
6414 break;
6415 case 8:
6416 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
6417 break;
6418 default:
6419 AssertFailed();
6420 }
6421 }
6422
6423 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6424 TlbState.freeRegsAndReleaseVars(pReNative);
6425
6426 /*
6427 * TlbDone:
6428 *
6429 * Commit the new RSP value.
6430 */
6431 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6432 }
6433#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6434
6435#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6436 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
6437#endif
6438 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6439 if (idxRegEffSp != idxRegRsp)
6440 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6441
6442 /* The value variable is implictly flushed. */
6443 if (idxRegValue != UINT8_MAX)
6444 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6445 iemNativeVarFreeLocal(pReNative, idxVarValue);
6446
6447 return off;
6448}
6449
6450
6451
6452/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
6453#define IEM_MC_POP_GREG_U16(a_iGReg) \
6454 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6455 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
6456#define IEM_MC_POP_GREG_U32(a_iGReg) \
6457 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6458 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
6459#define IEM_MC_POP_GREG_U64(a_iGReg) \
6460 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6461 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
6462
6463#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
6464 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6465 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6466#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
6467 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6468 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
6469
6470#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
6471 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6472 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6473#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
6474 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6475 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
6476
6477
6478DECL_FORCE_INLINE_THROW(uint32_t)
6479iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
6480 uint8_t idxRegTmp)
6481{
6482 /* Use16BitSp: */
6483#ifdef RT_ARCH_AMD64
6484 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6485 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6486 RT_NOREF(idxRegTmp);
6487#else
6488 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
6489 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
6490 /* add tmp, regrsp, #cbMem */
6491 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
6492 /* and tmp, tmp, #0xffff */
6493 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6494 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
6495 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
6496 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
6497#endif
6498 return off;
6499}
6500
6501
6502DECL_FORCE_INLINE(uint32_t)
6503iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6504{
6505 /* Use32BitSp: */
6506 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6507 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6508 return off;
6509}
6510
6511
6512/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
6513DECL_INLINE_THROW(uint32_t)
6514iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
6515 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6516{
6517 /*
6518 * Assert sanity.
6519 */
6520 Assert(idxGReg < 16);
6521#ifdef VBOX_STRICT
6522 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6523 {
6524 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6525 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6526 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6527 Assert( pfnFunction
6528 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6529 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
6530 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6531 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
6532 : UINT64_C(0xc000b000a0009000) ));
6533 }
6534 else
6535 Assert( pfnFunction
6536 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
6537 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
6538 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
6539 : UINT64_C(0xc000b000a0009000) ));
6540#endif
6541
6542#ifdef VBOX_STRICT
6543 /*
6544 * Check that the fExec flags we've got make sense.
6545 */
6546 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6547#endif
6548
6549 /*
6550 * To keep things simple we have to commit any pending writes first as we
6551 * may end up making calls.
6552 */
6553 off = iemNativeRegFlushPendingWrites(pReNative, off);
6554
6555 /*
6556 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
6557 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
6558 * directly as the effective stack pointer.
6559 * (Code structure is very similar to that of PUSH)
6560 */
6561 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6562 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6563 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6564 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6565 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6566 /** @todo can do a better job picking the register here. For cbMem >= 4 this
6567 * will be the resulting register value. */
6568 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
6569
6570 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6571 if (cBitsFlat != 0)
6572 {
6573 Assert(idxRegEffSp == idxRegRsp);
6574 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6575 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6576 }
6577 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6578 {
6579 Assert(idxRegEffSp != idxRegRsp);
6580 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6581 kIemNativeGstRegUse_ReadOnly);
6582#ifdef RT_ARCH_AMD64
6583 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6584#else
6585 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6586#endif
6587 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6588 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6589 offFixupJumpToUseOtherBitSp = off;
6590 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6591 {
6592/** @todo can skip idxRegRsp updating when popping ESP. */
6593 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6594 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6595 }
6596 else
6597 {
6598 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6599 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6600 }
6601 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6602 }
6603 /* SpUpdateEnd: */
6604 uint32_t const offLabelSpUpdateEnd = off;
6605
6606 /*
6607 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6608 * we're skipping lookup).
6609 */
6610 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6611 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
6612 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6613 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6614 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6615 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6616 : UINT32_MAX;
6617
6618 if (!TlbState.fSkip)
6619 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6620 else
6621 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6622
6623 /*
6624 * Use16BitSp:
6625 */
6626 if (cBitsFlat == 0)
6627 {
6628#ifdef RT_ARCH_AMD64
6629 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6630#else
6631 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6632#endif
6633 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6634 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6635 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6636 else
6637 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6638 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6639 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6640 }
6641
6642 /*
6643 * TlbMiss:
6644 *
6645 * Call helper to do the pushing.
6646 */
6647 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6648
6649#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6650 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6651#else
6652 RT_NOREF(idxInstr);
6653#endif
6654
6655 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6656 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6657 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
6658 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6659
6660
6661 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
6662 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6663 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6664
6665 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6666 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6667
6668 /* Done setting up parameters, make the call. */
6669 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6670
6671 /* Move the return register content to idxRegMemResult. */
6672 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
6673 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
6674
6675 /* Restore variables and guest shadow registers to volatile registers. */
6676 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6677 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6678
6679#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6680 if (!TlbState.fSkip)
6681 {
6682 /* end of TlbMiss - Jump to the done label. */
6683 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6684 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6685
6686 /*
6687 * TlbLookup:
6688 */
6689 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
6690 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6691
6692 /*
6693 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
6694 */
6695 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6696# ifdef VBOX_WITH_STATISTICS
6697 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6698 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6699# endif
6700 switch (cbMem)
6701 {
6702 case 2:
6703 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6704 break;
6705 case 4:
6706 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6707 break;
6708 case 8:
6709 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6710 break;
6711 default:
6712 AssertFailed();
6713 }
6714
6715 TlbState.freeRegsAndReleaseVars(pReNative);
6716
6717 /*
6718 * TlbDone:
6719 *
6720 * Set the new RSP value (FLAT accesses needs to calculate it first) and
6721 * commit the popped register value.
6722 */
6723 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6724 }
6725#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6726
6727 if (idxGReg != X86_GREG_xSP)
6728 {
6729 /* Set the register. */
6730 if (cbMem >= sizeof(uint32_t))
6731 {
6732#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6733 AssertMsg( pReNative->idxCurCall == 0
6734 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
6735 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
6736#endif
6737 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
6738#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6739 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
6740#endif
6741#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6742 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
6743 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6744#endif
6745 }
6746 else
6747 {
6748 Assert(cbMem == sizeof(uint16_t));
6749 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
6750 kIemNativeGstRegUse_ForUpdate);
6751 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
6752#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6753 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6754#endif
6755 iemNativeRegFreeTmp(pReNative, idxRegDst);
6756 }
6757
6758 /* Complete RSP calculation for FLAT mode. */
6759 if (idxRegEffSp == idxRegRsp)
6760 {
6761 if (cBitsFlat == 64)
6762 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6763 else
6764 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6765 }
6766 }
6767 else
6768 {
6769 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
6770 if (cbMem == sizeof(uint64_t))
6771 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
6772 else if (cbMem == sizeof(uint32_t))
6773 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
6774 else
6775 {
6776 if (idxRegEffSp == idxRegRsp)
6777 {
6778 if (cBitsFlat == 64)
6779 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6780 else
6781 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6782 }
6783 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
6784 }
6785 }
6786
6787#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6788 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
6789#endif
6790
6791 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6792 if (idxRegEffSp != idxRegRsp)
6793 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6794 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6795
6796 return off;
6797}
6798
6799
6800
6801/*********************************************************************************************************************************
6802* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
6803*********************************************************************************************************************************/
6804
6805#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6806 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6807 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
6808 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
6809
6810#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6811 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6812 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
6813 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
6814
6815#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6816 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6817 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
6818 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
6819
6820#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6821 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6822 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
6823 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
6824
6825
6826#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6827 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6828 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6829 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
6830
6831#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6832 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6833 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6834 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
6835
6836#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6837 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6838 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6839 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
6840
6841#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6842 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6843 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6844 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
6845
6846#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6847 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
6848 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6849 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
6850
6851
6852#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6853 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6854 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6855 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
6856
6857#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6858 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6859 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6860 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
6861
6862#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6863 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6864 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6865 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
6866
6867#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6868 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6869 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6870 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
6871
6872#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6873 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
6874 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6875 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
6876
6877
6878#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6879 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6880 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6881 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
6882
6883#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6884 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6885 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6886 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
6887#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6888 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6889 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6890 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
6891
6892#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6893 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6894 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6895 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
6896
6897#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6898 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
6899 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6900 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
6901
6902
6903#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6904 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
6905 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6906 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
6907
6908#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6909 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
6910 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
6911 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
6912
6913
6914#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6915 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6916 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6917 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
6918
6919#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6920 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6921 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6922 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
6923
6924#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6925 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6926 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6927 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
6928
6929#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6930 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6931 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6932 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
6933
6934
6935
6936#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6937 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6938 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
6939 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
6940
6941#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6942 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6943 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
6944 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
6945
6946#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6947 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6948 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
6949 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
6950
6951#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6952 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6953 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
6954 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
6955
6956
6957#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6958 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6959 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6960 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
6961
6962#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6963 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6964 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6965 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
6966
6967#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6968 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6969 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6970 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
6971
6972#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6973 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6974 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6975 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
6976
6977#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
6978 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
6979 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6980 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
6981
6982
6983#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6984 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6985 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6986 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
6987
6988#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6989 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6990 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6991 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
6992
6993#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6994 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6995 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6996 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
6997
6998#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6999 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7000 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7001 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
7002
7003#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
7004 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
7005 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7006 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
7007
7008
7009#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7010 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7011 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7012 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
7013
7014#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7015 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7016 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7017 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
7018
7019#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7020 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7021 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7022 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
7023
7024#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7025 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7026 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7027 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
7028
7029#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
7030 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
7031 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7032 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
7033
7034
7035#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
7036 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
7037 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7038 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
7039
7040#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
7041 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
7042 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
7043 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
7044
7045
7046#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7047 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7048 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7049 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
7050
7051#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7052 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7053 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7054 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
7055
7056#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7057 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7058 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7059 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
7060
7061#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7062 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7063 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7064 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
7065
7066
7067DECL_INLINE_THROW(uint32_t)
7068iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
7069 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
7070 uintptr_t pfnFunction, uint8_t idxInstr)
7071{
7072 /*
7073 * Assert sanity.
7074 */
7075 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
7076 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
7077 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
7078 && pVarMem->cbVar == sizeof(void *),
7079 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7080
7081 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7082 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7083 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
7084 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
7085 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7086
7087 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7088 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7089 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7090 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7091 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7092
7093 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7094
7095 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7096
7097#ifdef VBOX_STRICT
7098# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
7099 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
7100 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
7101 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
7102 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
7103# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
7104 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
7105 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
7106 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
7107
7108 if (iSegReg == UINT8_MAX)
7109 {
7110 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7111 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7112 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7113 switch (cbMem)
7114 {
7115 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
7116 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
7117 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
7118 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
7119 case 10:
7120 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
7121 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
7122 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
7123 break;
7124 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
7125# if 0
7126 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
7127 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
7128# endif
7129 default: AssertFailed(); break;
7130 }
7131 }
7132 else
7133 {
7134 Assert(iSegReg < 6);
7135 switch (cbMem)
7136 {
7137 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
7138 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
7139 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
7140 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
7141 case 10:
7142 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
7143 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
7144 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
7145 break;
7146 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
7147# if 0
7148 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
7149 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
7150# endif
7151 default: AssertFailed(); break;
7152 }
7153 }
7154# undef IEM_MAP_HLP_FN
7155# undef IEM_MAP_HLP_FN_NO_AT
7156#endif
7157
7158#ifdef VBOX_STRICT
7159 /*
7160 * Check that the fExec flags we've got make sense.
7161 */
7162 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7163#endif
7164
7165 /*
7166 * To keep things simple we have to commit any pending writes first as we
7167 * may end up making calls.
7168 */
7169 off = iemNativeRegFlushPendingWrites(pReNative, off);
7170
7171#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7172 /*
7173 * Move/spill/flush stuff out of call-volatile registers.
7174 * This is the easy way out. We could contain this to the tlb-miss branch
7175 * by saving and restoring active stuff here.
7176 */
7177 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
7178 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7179#endif
7180
7181 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
7182 while the tlb-miss codepath will temporarily put it on the stack.
7183 Set the the type to stack here so we don't need to do it twice below. */
7184 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
7185 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
7186 /** @todo use a tmp register from TlbState, since they'll be free after tlb
7187 * lookup is done. */
7188
7189 /*
7190 * Define labels and allocate the result register (trying for the return
7191 * register if we can).
7192 */
7193 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7194 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7195 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
7196 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
7197 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
7198 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7199 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7200 : UINT32_MAX;
7201//off=iemNativeEmitBrk(pReNative, off, 0);
7202 /*
7203 * Jump to the TLB lookup code.
7204 */
7205 if (!TlbState.fSkip)
7206 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7207
7208 /*
7209 * TlbMiss:
7210 *
7211 * Call helper to do the fetching.
7212 * We flush all guest register shadow copies here.
7213 */
7214 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7215
7216#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7217 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7218#else
7219 RT_NOREF(idxInstr);
7220#endif
7221
7222#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7223 /* Save variables in volatile registers. */
7224 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
7225 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7226#endif
7227
7228 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
7229 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
7230#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7231 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7232#else
7233 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7234#endif
7235
7236 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
7237 if (iSegReg != UINT8_MAX)
7238 {
7239 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7240 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
7241 }
7242
7243 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
7244 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
7245 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
7246
7247 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7248 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7249
7250 /* Done setting up parameters, make the call. */
7251 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7252
7253 /*
7254 * Put the output in the right registers.
7255 */
7256 Assert(idxRegMemResult == pVarMem->idxReg);
7257 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7258 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7259
7260#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7261 /* Restore variables and guest shadow registers to volatile registers. */
7262 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7263 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7264#endif
7265
7266 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
7267 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
7268
7269#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7270 if (!TlbState.fSkip)
7271 {
7272 /* end of tlbsmiss - Jump to the done label. */
7273 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7274 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7275
7276 /*
7277 * TlbLookup:
7278 */
7279 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
7280 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7281# ifdef VBOX_WITH_STATISTICS
7282 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
7283 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
7284# endif
7285
7286 /* [idxVarUnmapInfo] = 0; */
7287 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
7288
7289 /*
7290 * TlbDone:
7291 */
7292 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7293
7294 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7295
7296# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7297 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7298 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7299# endif
7300 }
7301#else
7302 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
7303#endif
7304
7305 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7306 iemNativeVarRegisterRelease(pReNative, idxVarMem);
7307
7308 return off;
7309}
7310
7311
7312#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
7313 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
7314 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
7315
7316#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
7317 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
7318 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
7319
7320#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
7321 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
7322 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
7323
7324#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
7325 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
7326 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
7327
7328DECL_INLINE_THROW(uint32_t)
7329iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
7330 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
7331{
7332 /*
7333 * Assert sanity.
7334 */
7335 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7336#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
7337 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7338#endif
7339 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
7340 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
7341 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
7342#ifdef VBOX_STRICT
7343 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
7344 {
7345 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
7346 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
7347 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
7348 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
7349 case IEM_ACCESS_TYPE_WRITE:
7350 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
7351 case IEM_ACCESS_TYPE_READ:
7352 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
7353 default: AssertFailed();
7354 }
7355#else
7356 RT_NOREF(fAccess);
7357#endif
7358
7359 /*
7360 * To keep things simple we have to commit any pending writes first as we
7361 * may end up making calls (there shouldn't be any at this point, so this
7362 * is just for consistency).
7363 */
7364 /** @todo we could postpone this till we make the call and reload the
7365 * registers after returning from the call. Not sure if that's sensible or
7366 * not, though. */
7367 off = iemNativeRegFlushPendingWrites(pReNative, off);
7368
7369 /*
7370 * Move/spill/flush stuff out of call-volatile registers.
7371 *
7372 * We exclude any register holding the bUnmapInfo variable, as we'll be
7373 * checking it after returning from the call and will free it afterwards.
7374 */
7375 /** @todo save+restore active registers and maybe guest shadows in miss
7376 * scenario. */
7377 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
7378 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
7379
7380 /*
7381 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
7382 * to call the unmap helper function.
7383 *
7384 * The likelyhood of it being zero is higher than for the TLB hit when doing
7385 * the mapping, as a TLB miss for an well aligned and unproblematic memory
7386 * access should also end up with a mapping that won't need special unmapping.
7387 */
7388 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
7389 * should speed up things for the pure interpreter as well when TLBs
7390 * are enabled. */
7391#ifdef RT_ARCH_AMD64
7392 if (pVarUnmapInfo->idxReg == UINT8_MAX)
7393 {
7394 /* test byte [rbp - xxx], 0ffh */
7395 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7396 pbCodeBuf[off++] = 0xf6;
7397 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
7398 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
7399 pbCodeBuf[off++] = 0xff;
7400 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7401 }
7402 else
7403#endif
7404 {
7405 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
7406 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
7407 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
7408 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7409 }
7410 uint32_t const offJmpFixup = off;
7411 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
7412
7413 /*
7414 * Call the unmap helper function.
7415 */
7416#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
7417 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7418#else
7419 RT_NOREF(idxInstr);
7420#endif
7421
7422 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
7423 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
7424 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7425
7426 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7427 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7428
7429 /* Done setting up parameters, make the call. */
7430 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7431
7432 /* The bUnmapInfo variable is implictly free by these MCs. */
7433 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
7434
7435 /*
7436 * Done, just fixup the jump for the non-call case.
7437 */
7438 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
7439
7440 return off;
7441}
7442
7443
7444
7445/*********************************************************************************************************************************
7446* State and Exceptions *
7447*********************************************************************************************************************************/
7448
7449#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7450#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7451
7452#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7453#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7454#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7455
7456#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7457#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7458#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7459
7460
7461DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
7462{
7463 /** @todo this needs a lot more work later. */
7464 RT_NOREF(pReNative, fForChange);
7465 return off;
7466}
7467
7468
7469
7470/*********************************************************************************************************************************
7471* Emitters for FPU related operations. *
7472*********************************************************************************************************************************/
7473
7474#define IEM_MC_FETCH_FCW(a_u16Fcw) \
7475 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
7476
7477/** Emits code for IEM_MC_FETCH_FCW. */
7478DECL_INLINE_THROW(uint32_t)
7479iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7480{
7481 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7482 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7483
7484 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7485
7486 /* Allocate a temporary FCW register. */
7487 /** @todo eliminate extra register */
7488 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
7489 kIemNativeGstRegUse_ReadOnly);
7490
7491 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
7492
7493 /* Free but don't flush the FCW register. */
7494 iemNativeRegFreeTmp(pReNative, idxFcwReg);
7495 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7496
7497 return off;
7498}
7499
7500
7501#define IEM_MC_FETCH_FSW(a_u16Fsw) \
7502 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
7503
7504/** Emits code for IEM_MC_FETCH_FSW. */
7505DECL_INLINE_THROW(uint32_t)
7506iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7507{
7508 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7509 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7510
7511 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
7512 /* Allocate a temporary FSW register. */
7513 /** @todo eliminate extra register */
7514 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
7515 kIemNativeGstRegUse_ReadOnly);
7516
7517 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
7518
7519 /* Free but don't flush the FSW register. */
7520 iemNativeRegFreeTmp(pReNative, idxFswReg);
7521 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7522
7523 return off;
7524}
7525
7526
7527
7528#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7529
7530
7531/*********************************************************************************************************************************
7532* Emitters for SSE/AVX specific operations. *
7533*********************************************************************************************************************************/
7534
7535#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
7536 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
7537
7538/** Emits code for IEM_MC_COPY_XREG_U128. */
7539DECL_INLINE_THROW(uint32_t)
7540iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
7541{
7542 /* This is a nop if the source and destination register are the same. */
7543 if (iXRegDst != iXRegSrc)
7544 {
7545 /* Allocate destination and source register. */
7546 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
7547 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7548 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
7549 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7550
7551 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7552
7553 /* Free but don't flush the source and destination register. */
7554 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7555 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7556 }
7557
7558 return off;
7559}
7560
7561
7562#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
7563 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
7564
7565/** Emits code for IEM_MC_FETCH_XREG_U128. */
7566DECL_INLINE_THROW(uint32_t)
7567iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
7568{
7569 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7570 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
7571
7572 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7573 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7574
7575 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
7576
7577 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
7578
7579 /* Free but don't flush the source register. */
7580 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7581 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
7582
7583 return off;
7584}
7585
7586
7587#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
7588 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
7589
7590/** Emits code for IEM_MC_FETCH_XREG_U64. */
7591DECL_INLINE_THROW(uint32_t)
7592iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
7593{
7594 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7595 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7596
7597 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7598 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7599
7600 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7601 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7602
7603 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
7604
7605 /* Free but don't flush the source register. */
7606 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7607 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7608
7609 return off;
7610}
7611
7612
7613#define IEM_MC_FETCH_XREG_U32(a_u64Value, a_iXReg, a_iDWord) \
7614 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u64Value, a_iXReg, a_iDWord)
7615
7616/** Emits code for IEM_MC_FETCH_XREG_U32. */
7617DECL_INLINE_THROW(uint32_t)
7618iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
7619{
7620 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7621 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7622
7623 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7624 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7625
7626 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7627 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7628
7629 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
7630
7631 /* Free but don't flush the source register. */
7632 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7633 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7634
7635 return off;
7636}
7637
7638
7639#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
7640 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
7641
7642/** Emits code for IEM_MC_FETCH_XREG_U16. */
7643DECL_INLINE_THROW(uint32_t)
7644iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
7645{
7646 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7647 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7648
7649 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7650 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7651
7652 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7653 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7654
7655 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
7656
7657 /* Free but don't flush the source register. */
7658 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7659 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7660
7661 return off;
7662}
7663
7664
7665#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
7666 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
7667
7668/** Emits code for IEM_MC_FETCH_XREG_U8. */
7669DECL_INLINE_THROW(uint32_t)
7670iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
7671{
7672 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7673 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
7674
7675 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7676 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7677
7678 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7679 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7680
7681 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
7682
7683 /* Free but don't flush the source register. */
7684 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7685 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7686
7687 return off;
7688}
7689
7690
7691#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
7692 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
7693
7694AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7695#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
7696 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
7697
7698
7699/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
7700DECL_INLINE_THROW(uint32_t)
7701iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7702{
7703 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7704 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7705
7706 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7707 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7708
7709 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
7710
7711 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
7712
7713 /* Free but don't flush the source register. */
7714 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7715 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7716
7717 return off;
7718}
7719
7720
7721#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
7722 off = iemNativeEmitSimdStoreXregU64(pReNative, off, a_iXReg, a_u64Value, a_iQWord)
7723
7724/** Emits code for IEM_MC_STORE_XREG_U64. */
7725DECL_INLINE_THROW(uint32_t)
7726iemNativeEmitSimdStoreXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar, uint8_t iQWord)
7727{
7728 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7729 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7730
7731 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7732 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7733
7734 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7735
7736 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQWord);
7737
7738 /* Free but don't flush the source register. */
7739 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7740 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7741
7742 return off;
7743}
7744
7745
7746#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
7747 off = iemNativeEmitSimdStoreXregU32(pReNative, off, a_iXReg, a_u32Value, a_iDWord)
7748
7749/** Emits code for IEM_MC_STORE_XREG_U32. */
7750DECL_INLINE_THROW(uint32_t)
7751iemNativeEmitSimdStoreXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar, uint8_t iDWord)
7752{
7753 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7754 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7755
7756 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7757 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7758
7759 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7760
7761 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iDWord);
7762
7763 /* Free but don't flush the source register. */
7764 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7765 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7766
7767 return off;
7768}
7769
7770
7771#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
7772 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
7773
7774/** Emits code for IEM_MC_STORE_XREG_U32. */
7775DECL_INLINE_THROW(uint32_t)
7776iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7777{
7778 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7779 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7780
7781 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7782 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7783
7784 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7785
7786 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
7787 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7788 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7789
7790 /* Free but don't flush the source register. */
7791 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7792 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7793
7794 return off;
7795}
7796
7797
7798#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
7799 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
7800
7801/** Emits code for IEM_MC_STORE_XREG_U32. */
7802DECL_INLINE_THROW(uint32_t)
7803iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7804{
7805 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7806 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7807
7808 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7809 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7810
7811 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7812
7813 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
7814 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7815 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7816
7817 /* Free but don't flush the source register. */
7818 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7819 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7820
7821 return off;
7822}
7823
7824
7825#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
7826 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
7827
7828/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
7829DECL_INLINE_THROW(uint32_t)
7830iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst, uint8_t idxSrcVar, uint8_t iDwSrc)
7831{
7832 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7833 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7834
7835 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7836 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7837
7838 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
7839
7840 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
7841 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
7842
7843 /* Free but don't flush the destination register. */
7844 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7845 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7846
7847 return off;
7848}
7849
7850
7851#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
7852 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
7853
7854/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
7855DECL_INLINE_THROW(uint32_t)
7856iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
7857{
7858 /*
7859 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
7860 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
7861 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
7862 */
7863 if (iYRegDst != iYRegSrc)
7864 {
7865 /* Allocate destination and source register. */
7866 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7867 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7868 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
7869 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7870
7871 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7872 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7873
7874 /* Free but don't flush the source and destination register. */
7875 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7876 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7877 }
7878 else
7879 {
7880 /* This effectively only clears the upper 128-bits of the register. */
7881 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7882 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
7883
7884 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
7885
7886 /* Free but don't flush the destination register. */
7887 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
7888 }
7889
7890 return off;
7891}
7892
7893
7894#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
7895 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
7896
7897/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
7898DECL_INLINE_THROW(uint32_t)
7899iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
7900{
7901 /*
7902 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
7903 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
7904 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
7905 * iYRegSrc == iYRegDst would effectively only clear any upper 256-bits for a zmm register we don't support yet, so this is just a nop.
7906 */
7907 if (iYRegDst != iYRegSrc)
7908 {
7909 /* Allocate destination and source register. */
7910 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
7911 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
7912 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7913 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7914
7915 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7916
7917 /* Free but don't flush the source and destination register. */
7918 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7919 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7920 }
7921
7922 return off;
7923}
7924
7925
7926#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
7927 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
7928
7929/** Emits code for IEM_MC_FETCH_YREG_U128. */
7930DECL_INLINE_THROW(uint32_t)
7931iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
7932{
7933 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7934 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
7935
7936 Assert(iDQWord <= 1);
7937 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7938 iDQWord == 1
7939 ? kIemNativeGstSimdRegLdStSz_High128
7940 : kIemNativeGstSimdRegLdStSz_Low128,
7941 kIemNativeGstRegUse_ReadOnly);
7942
7943 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7944 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
7945
7946 if (iDQWord == 1)
7947 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
7948 else
7949 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
7950
7951 /* Free but don't flush the source register. */
7952 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7953 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
7954
7955 return off;
7956}
7957
7958
7959#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
7960 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
7961
7962/** Emits code for IEM_MC_FETCH_YREG_U64. */
7963DECL_INLINE_THROW(uint32_t)
7964iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
7965{
7966 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7967 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7968
7969 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7970 iQWord >= 2
7971 ? kIemNativeGstSimdRegLdStSz_High128
7972 : kIemNativeGstSimdRegLdStSz_Low128,
7973 kIemNativeGstRegUse_ReadOnly);
7974
7975 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7976 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7977
7978 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
7979
7980 /* Free but don't flush the source register. */
7981 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7982 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7983
7984 return off;
7985}
7986
7987
7988#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
7989 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
7990
7991/** Emits code for IEM_MC_FETCH_YREG_U32. */
7992DECL_INLINE_THROW(uint32_t)
7993iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
7994{
7995 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7996 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7997
7998 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7999 iDWord >= 4
8000 ? kIemNativeGstSimdRegLdStSz_High128
8001 : kIemNativeGstSimdRegLdStSz_Low128,
8002 kIemNativeGstRegUse_ReadOnly);
8003
8004 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8005 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8006
8007 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
8008
8009 /* Free but don't flush the source register. */
8010 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8011 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8012
8013 return off;
8014}
8015
8016
8017#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
8018 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
8019
8020/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
8021DECL_INLINE_THROW(uint32_t)
8022iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
8023{
8024 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8025 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
8026
8027 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
8028
8029 /* Free but don't flush the register. */
8030 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
8031
8032 return off;
8033}
8034
8035
8036#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
8037 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
8038
8039/** Emits code for IEM_MC_STORE_YREG_U128. */
8040DECL_INLINE_THROW(uint32_t)
8041iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
8042{
8043 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8044 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8045
8046 Assert(iDQword <= 1);
8047 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8048 iDQword == 0
8049 ? kIemNativeGstSimdRegLdStSz_Low128
8050 : kIemNativeGstSimdRegLdStSz_High128,
8051 kIemNativeGstRegUse_ForFullWrite);
8052
8053 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8054
8055 if (iDQword == 0)
8056 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8057 else
8058 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
8059
8060 /* Free but don't flush the source register. */
8061 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8062 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8063
8064 return off;
8065}
8066
8067
8068#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8069 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8070
8071/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
8072DECL_INLINE_THROW(uint32_t)
8073iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8074{
8075 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8076 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8077
8078 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8079 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8080
8081 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8082
8083 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8084 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8085
8086 /* Free but don't flush the source register. */
8087 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8088 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8089
8090 return off;
8091}
8092
8093
8094#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
8095 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
8096
8097/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
8098DECL_INLINE_THROW(uint32_t)
8099iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8100{
8101 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8102 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
8103
8104 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8105 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8106
8107 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8108
8109 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8110 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8111
8112 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8113 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8114
8115 return off;
8116}
8117
8118
8119#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
8120 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
8121
8122/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
8123DECL_INLINE_THROW(uint32_t)
8124iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8125{
8126 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8127 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8128
8129 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8130 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8131
8132 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8133
8134 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8135 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8136
8137 /* Free but don't flush the source register. */
8138 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8139 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8140
8141 return off;
8142}
8143
8144
8145#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
8146 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
8147
8148/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
8149DECL_INLINE_THROW(uint32_t)
8150iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8151{
8152 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8153 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8154
8155 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8156 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8157
8158 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8159
8160 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8161 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8162
8163 /* Free but don't flush the source register. */
8164 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8165 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8166
8167 return off;
8168}
8169
8170
8171#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
8172 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
8173
8174/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
8175DECL_INLINE_THROW(uint32_t)
8176iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8177{
8178 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8179 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8180
8181 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8182 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8183
8184 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8185
8186 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8187 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8188
8189 /* Free but don't flush the source register. */
8190 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8191 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8192
8193 return off;
8194}
8195
8196
8197#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
8198 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
8199
8200/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
8201DECL_INLINE_THROW(uint32_t)
8202iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8203{
8204 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8205 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
8206
8207 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8208 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8209
8210 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8211
8212 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8213
8214 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8215 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8216
8217 return off;
8218}
8219
8220
8221#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
8222 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
8223
8224/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
8225DECL_INLINE_THROW(uint32_t)
8226iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8227{
8228 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8229 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8230
8231 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8232 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8233
8234 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8235
8236 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8237
8238 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8239 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8240
8241 return off;
8242}
8243
8244
8245#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8246 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8247
8248/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
8249DECL_INLINE_THROW(uint32_t)
8250iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8251{
8252 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8253 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8254
8255 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8256 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8257
8258 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8259
8260 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8261
8262 /* Free but don't flush the source register. */
8263 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8264 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8265
8266 return off;
8267}
8268
8269
8270#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8271 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8272
8273/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
8274DECL_INLINE_THROW(uint32_t)
8275iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8276{
8277 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8278 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8279
8280 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8281 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8282
8283 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8284
8285 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8286
8287 /* Free but don't flush the source register. */
8288 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8289 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8290
8291 return off;
8292}
8293
8294
8295#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8296 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8297
8298/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
8299DECL_INLINE_THROW(uint32_t)
8300iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8301{
8302 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8303 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8304
8305 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8306 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8307
8308 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8309
8310 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
8311
8312 /* Free but don't flush the source register. */
8313 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8314 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8315
8316 return off;
8317}
8318
8319
8320#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8321 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8322
8323/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
8324DECL_INLINE_THROW(uint32_t)
8325iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8326{
8327 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8328 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8329
8330 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8331 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8332
8333 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8334
8335 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8336 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
8337
8338 /* Free but don't flush the source register. */
8339 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8340 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8341
8342 return off;
8343}
8344
8345
8346#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8347 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8348
8349/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
8350DECL_INLINE_THROW(uint32_t)
8351iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8352{
8353 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8354 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8355
8356 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8357 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8358
8359 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8360
8361 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8362 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8363
8364 /* Free but don't flush the source register. */
8365 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8366 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8367
8368 return off;
8369}
8370
8371
8372#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
8373 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
8374
8375/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
8376DECL_INLINE_THROW(uint32_t)
8377iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
8378{
8379 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8380 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8381
8382 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8383 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8384 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8385 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8386 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8387
8388 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8389 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8390 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8391
8392 /* Free but don't flush the source and destination registers. */
8393 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8394 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8395 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8396
8397 return off;
8398}
8399
8400
8401#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
8402 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
8403
8404/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
8405DECL_INLINE_THROW(uint32_t)
8406iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
8407{
8408 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8409 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8410
8411 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8412 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8413 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8414 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8415 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8416
8417 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8418 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
8419 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8420
8421 /* Free but don't flush the source and destination registers. */
8422 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8423 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8424 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8425
8426 return off;
8427}
8428
8429
8430#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
8431 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
8432
8433
8434/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
8435DECL_INLINE_THROW(uint32_t)
8436iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
8437{
8438 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8439 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8440
8441 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
8442 if (bImm8Mask & RT_BIT(0))
8443 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
8444 if (bImm8Mask & RT_BIT(1))
8445 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
8446 if (bImm8Mask & RT_BIT(2))
8447 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
8448 if (bImm8Mask & RT_BIT(3))
8449 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
8450
8451 /* Free but don't flush the destination register. */
8452 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8453
8454 return off;
8455}
8456
8457
8458#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
8459 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
8460
8461
8462/** Emits code for IEM_MC_FETCH_YREG_U256. */
8463DECL_INLINE_THROW(uint32_t)
8464iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
8465{
8466 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8467 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
8468
8469 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8470 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
8471 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8472
8473 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
8474
8475 /* Free but don't flush the source register. */
8476 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8477 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8478
8479 return off;
8480}
8481
8482
8483#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
8484 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
8485
8486
8487/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX. */
8488DECL_INLINE_THROW(uint32_t)
8489iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
8490{
8491 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8492 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8493
8494 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8495 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8496 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8497
8498 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
8499
8500 /* Free but don't flush the source register. */
8501 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8502 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8503
8504 return off;
8505}
8506
8507
8508#define IEM_MC_SSE_UPDATE_MXCSR(a_fMxcsr) \
8509 off = iemNativeEmitSimdSseUpdateMxcsr(pReNative, off, a_fMxcsr)
8510
8511/** Emits code for IEM_MC_SSE_UPDATE_MXCSR. */
8512DECL_INLINE_THROW(uint32_t)
8513iemNativeEmitSimdSseUpdateMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxMxCsrVar)
8514{
8515 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxMxCsrVar);
8516 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxMxCsrVar, sizeof(uint32_t));
8517
8518 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ForUpdate);
8519 uint8_t const idxVarRegMxCsr = iemNativeVarRegisterAcquire(pReNative, idxMxCsrVar, &off, true /*fInitalized*/);
8520 uint8_t const idxVarRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8521
8522 /** @todo r=aeichner I think it would be safe to spare the temporary register and trash
8523 * the variable MXCSR register as it isn't used afterwards in the microcode block anyway.
8524 * Needs verification though, so play it safe for now.
8525 */
8526 /* mov tmp, varmxcsr */
8527 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarRegTmp, idxVarRegMxCsr);
8528 /* and tmp, X86_MXCSR_XCPT_FLAGS */
8529 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarRegTmp, X86_MXCSR_XCPT_FLAGS);
8530 /* or mxcsr, tmp */
8531 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxVarRegTmp);
8532
8533 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
8534 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
8535
8536 /* Free but don't flush the MXCSR register. */
8537 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8538 iemNativeVarRegisterRelease(pReNative, idxMxCsrVar);
8539 iemNativeRegFreeTmp(pReNative, idxVarRegTmp);
8540
8541 return off;
8542}
8543
8544
8545#define IEM_MC_STORE_SSE_RESULT(a_SseData, a_iXmmReg) \
8546 off = iemNativeEmitSimdSseStoreResult(pReNative, off, a_SseData, a_iXmmReg)
8547
8548/** Emits code for IEM_MC_STORE_SSE_RESULT. */
8549DECL_INLINE_THROW(uint32_t)
8550iemNativeEmitSimdSseStoreResult(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSseDataVar, uint8_t iXReg)
8551{
8552 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSseDataVar);
8553 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSseDataVar, sizeof(IEMSSERESULT));
8554
8555 /** @todo r=aeichner We probably need to rework this MC statement and the users to make thing more efficient. */
8556 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8557 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
8558 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ForUpdate);
8559 uint8_t const idxVarRegResAddr = iemNativeRegAllocTmp(pReNative, &off);
8560 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8561
8562 off = iemNativeEmitLoadArgGregWithVarAddr(pReNative, off, idxVarRegResAddr, idxSseDataVar, false /*fFlushShadows*/);
8563
8564 /* Update MXCSR. */
8565 off = iemNativeEmitLoadGprByGprU32(pReNative, off, idxRegTmp, idxVarRegResAddr, RT_UOFFSETOF_DYN(IEMSSERESULT, MXCSR));
8566 /* tmp &= X86_MXCSR_XCPT_FLAGS. */
8567 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
8568 /* mxcsr |= tmp */
8569 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
8570
8571 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
8572 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
8573
8574 /* Update the value if there is no unmasked exception. */
8575 /* tmp = mxcsr */
8576 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
8577 /* tmp &= X86_MXCSR_XCPT_MASK */
8578 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
8579 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
8580 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
8581 /* tmp = ~tmp */
8582 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
8583 /* tmp &= mxcsr */
8584 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
8585
8586 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
8587 uint32_t offFixup = off;
8588 off = iemNativeEmitJnzToFixed(pReNative, off, off);
8589 AssertCompileMemberSize(IEMSSERESULT, uResult, sizeof(RTFLOAT128U));
8590 off = iemNativeEmitLoadVecRegByGprU128(pReNative, off, idxSimdRegDst, idxVarRegResAddr, RT_UOFFSETOF_DYN(IEMSSERESULT, uResult));
8591 iemNativeFixupFixedJump(pReNative, offFixup, off);
8592
8593 /* Free but don't flush the shadowed register. */
8594 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8595 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8596 iemNativeRegFreeTmp(pReNative, idxVarRegResAddr);
8597 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8598
8599 return off;
8600}
8601
8602
8603/*********************************************************************************************************************************
8604* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
8605*********************************************************************************************************************************/
8606
8607/**
8608 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX.
8609 */
8610DECL_INLINE_THROW(uint32_t)
8611iemNativeEmitCallSseAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
8612{
8613 /*
8614 * Need to do the FPU preparation.
8615 */
8616 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
8617
8618 /*
8619 * Do all the call setup and cleanup.
8620 */
8621 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS);
8622
8623 /*
8624 * Load the XState::x87 pointer.
8625 */
8626 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, kIemNativeGstRegRef_X87, 0 /*idxRegInClass*/);
8627
8628 /*
8629 * Make the call.
8630 */
8631 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8632
8633 return off;
8634}
8635
8636
8637#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
8638 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8639
8640/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
8641DECL_INLINE_THROW(uint32_t)
8642iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8643{
8644 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8645 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8646 return iemNativeEmitCallSseAImplCommon(pReNative, off, pfnAImpl, 2);
8647}
8648
8649
8650#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8651 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8652
8653/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
8654DECL_INLINE_THROW(uint32_t)
8655iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8656{
8657 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8658 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8659 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8660 return iemNativeEmitCallSseAImplCommon(pReNative, off, pfnAImpl, 3);
8661}
8662
8663
8664/*********************************************************************************************************************************
8665* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
8666*********************************************************************************************************************************/
8667
8668/**
8669 * Common worker for IEM_MC_CALL_AVX_AIMPL_XXX.
8670 */
8671DECL_INLINE_THROW(uint32_t)
8672iemNativeEmitCallAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
8673{
8674 /*
8675 * Need to do the FPU preparation.
8676 */
8677 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
8678
8679 /*
8680 * Do all the call setup and cleanup.
8681 */
8682 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_AVX_AIMPL_HIDDEN_ARGS, IEM_AVX_AIMPL_HIDDEN_ARGS);
8683
8684 /*
8685 * Load the XState pointer.
8686 */
8687 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, kIemNativeGstRegRef_XState, 0 /*idxRegInClass*/);
8688
8689 /*
8690 * Make the call.
8691 */
8692 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8693
8694 return off;
8695}
8696
8697
8698#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
8699 off = iemNativeEmitCallAvxAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8700
8701/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
8702DECL_INLINE_THROW(uint32_t)
8703iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8704{
8705 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8706 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8707 return iemNativeEmitCallAvxAImplCommon(pReNative, off, pfnAImpl, 2);
8708}
8709
8710
8711#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8712 off = iemNativeEmitCallAvxAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8713
8714/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
8715DECL_INLINE_THROW(uint32_t)
8716iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8717{
8718 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8719 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8720 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8721 return iemNativeEmitCallAvxAImplCommon(pReNative, off, pfnAImpl, 3);
8722}
8723#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8724
8725
8726/*********************************************************************************************************************************
8727* Include instruction emitters. *
8728*********************************************************************************************************************************/
8729#include "target-x86/IEMAllN8veEmit-x86.h"
8730
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette