VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 104030

Last change on this file since 104030 was 104021, checked in by vboxsync, 11 months ago

VMM/IEM: Implement native emitters for IEM_MC_CALL_AVX_AIMPL_2() and IEM_MC_CALL_AVX_AIMPL_3(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 411.3 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 104021 2024-03-24 16:27:34Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
117 if ( enmClass == kIemNativeGstRegRef_XReg
118 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
119 {
120 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
121 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
122 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
123
124 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
125 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
126 }
127#endif
128 RT_NOREF(pReNative, enmClass, idxReg);
129 return off;
130}
131
132
133
134/*********************************************************************************************************************************
135* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
136*********************************************************************************************************************************/
137
138#undef IEM_MC_BEGIN /* unused */
139#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
140 { \
141 Assert(pReNative->Core.bmVars == 0); \
142 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
143 Assert(pReNative->Core.bmStack == 0); \
144 pReNative->fMc = (a_fMcFlags); \
145 pReNative->fCImpl = (a_fCImplFlags); \
146 pReNative->cArgsX = (a_cArgsIncludingHidden)
147
148/** We have to get to the end in recompilation mode, as otherwise we won't
149 * generate code for all the IEM_MC_IF_XXX branches. */
150#define IEM_MC_END() \
151 iemNativeVarFreeAll(pReNative); \
152 } return off
153
154
155
156/*********************************************************************************************************************************
157* Native Emitter Support. *
158*********************************************************************************************************************************/
159
160#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
161
162#define IEM_MC_NATIVE_ELSE() } else {
163
164#define IEM_MC_NATIVE_ENDIF() } ((void)0)
165
166
167#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
168 off = a_fnEmitter(pReNative, off)
169
170#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
171 off = a_fnEmitter(pReNative, off, (a0))
172
173#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
174 off = a_fnEmitter(pReNative, off, (a0), (a1))
175
176#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
177 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
178
179#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
180 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
181
182#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
183 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
184
185#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
186 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
187
188#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
189 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
190
191#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
192 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
193
194
195
196/*********************************************************************************************************************************
197* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
198*********************************************************************************************************************************/
199
200#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
201 pReNative->fMc = 0; \
202 pReNative->fCImpl = (a_fFlags); \
203 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
204
205
206#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
207 pReNative->fMc = 0; \
208 pReNative->fCImpl = (a_fFlags); \
209 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
210
211DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
212 uint8_t idxInstr, uint64_t a_fGstShwFlush,
213 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
214{
215 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
216}
217
218
219#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
220 pReNative->fMc = 0; \
221 pReNative->fCImpl = (a_fFlags); \
222 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
223 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
224
225DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
226 uint8_t idxInstr, uint64_t a_fGstShwFlush,
227 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
228{
229 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
230}
231
232
233#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
234 pReNative->fMc = 0; \
235 pReNative->fCImpl = (a_fFlags); \
236 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
237 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
238
239DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
240 uint8_t idxInstr, uint64_t a_fGstShwFlush,
241 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
242 uint64_t uArg2)
243{
244 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
245}
246
247
248
249/*********************************************************************************************************************************
250* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
251*********************************************************************************************************************************/
252
253/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
254 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
255DECL_INLINE_THROW(uint32_t)
256iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
257{
258 /*
259 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
260 * return with special status code and make the execution loop deal with
261 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
262 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
263 * could continue w/o interruption, it probably will drop into the
264 * debugger, so not worth the effort of trying to services it here and we
265 * just lump it in with the handling of the others.
266 *
267 * To simplify the code and the register state management even more (wrt
268 * immediate in AND operation), we always update the flags and skip the
269 * extra check associated conditional jump.
270 */
271 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
272 <= UINT32_MAX);
273#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
274 AssertMsg( pReNative->idxCurCall == 0
275 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
276 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
277#endif
278
279 /*
280 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
281 * any pending register writes must be flushed.
282 */
283 off = iemNativeRegFlushPendingWrites(pReNative, off);
284
285 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
286 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
287 true /*fSkipLivenessAssert*/);
288 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
289 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
290 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
291 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
292 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
293
294 /* Free but don't flush the EFLAGS register. */
295 iemNativeRegFreeTmp(pReNative, idxEflReg);
296
297 return off;
298}
299
300
301/** The VINF_SUCCESS dummy. */
302template<int const a_rcNormal>
303DECL_FORCE_INLINE(uint32_t)
304iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
305{
306 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
307 if (a_rcNormal != VINF_SUCCESS)
308 {
309#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
310 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
311#else
312 RT_NOREF_PV(idxInstr);
313#endif
314
315 /* As this code returns from the TB any pending register writes must be flushed. */
316 off = iemNativeRegFlushPendingWrites(pReNative, off);
317
318 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
319 }
320 return off;
321}
322
323
324#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
325 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
326 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
327
328#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
329 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
330 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
331 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
332
333/** Same as iemRegAddToRip64AndFinishingNoFlags. */
334DECL_INLINE_THROW(uint32_t)
335iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
336{
337#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
338# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
339 if (!pReNative->Core.offPc)
340 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
341# endif
342
343 /* Allocate a temporary PC register. */
344 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
345
346 /* Perform the addition and store the result. */
347 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
348 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
349
350 /* Free but don't flush the PC register. */
351 iemNativeRegFreeTmp(pReNative, idxPcReg);
352#endif
353
354#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
355 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
356
357 pReNative->Core.offPc += cbInstr;
358# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
359 off = iemNativePcAdjustCheck(pReNative, off);
360# endif
361 if (pReNative->cCondDepth)
362 off = iemNativeEmitPcWriteback(pReNative, off);
363 else
364 pReNative->Core.cInstrPcUpdateSkipped++;
365#endif
366
367 return off;
368}
369
370
371#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
372 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
373 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
374
375#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
376 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
377 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
378 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
379
380/** Same as iemRegAddToEip32AndFinishingNoFlags. */
381DECL_INLINE_THROW(uint32_t)
382iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
383{
384#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
385# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
386 if (!pReNative->Core.offPc)
387 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
388# endif
389
390 /* Allocate a temporary PC register. */
391 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
392
393 /* Perform the addition and store the result. */
394 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
395 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
396
397 /* Free but don't flush the PC register. */
398 iemNativeRegFreeTmp(pReNative, idxPcReg);
399#endif
400
401#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
402 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
403
404 pReNative->Core.offPc += cbInstr;
405# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
406 off = iemNativePcAdjustCheck(pReNative, off);
407# endif
408 if (pReNative->cCondDepth)
409 off = iemNativeEmitPcWriteback(pReNative, off);
410 else
411 pReNative->Core.cInstrPcUpdateSkipped++;
412#endif
413
414 return off;
415}
416
417
418#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
419 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
420 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
421
422#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
423 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
424 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
425 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
426
427/** Same as iemRegAddToIp16AndFinishingNoFlags. */
428DECL_INLINE_THROW(uint32_t)
429iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
430{
431#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
432# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
433 if (!pReNative->Core.offPc)
434 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
435# endif
436
437 /* Allocate a temporary PC register. */
438 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
439
440 /* Perform the addition and store the result. */
441 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
442 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
443 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
444
445 /* Free but don't flush the PC register. */
446 iemNativeRegFreeTmp(pReNative, idxPcReg);
447#endif
448
449#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
450 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
451
452 pReNative->Core.offPc += cbInstr;
453# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
454 off = iemNativePcAdjustCheck(pReNative, off);
455# endif
456 if (pReNative->cCondDepth)
457 off = iemNativeEmitPcWriteback(pReNative, off);
458 else
459 pReNative->Core.cInstrPcUpdateSkipped++;
460#endif
461
462 return off;
463}
464
465
466
467/*********************************************************************************************************************************
468* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
469*********************************************************************************************************************************/
470
471#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
472 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
473 (a_enmEffOpSize), pCallEntry->idxInstr); \
474 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
475
476#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
477 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
478 (a_enmEffOpSize), pCallEntry->idxInstr); \
479 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
480 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
481
482#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
483 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
484 IEMMODE_16BIT, pCallEntry->idxInstr); \
485 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
486
487#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
488 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
489 IEMMODE_16BIT, pCallEntry->idxInstr); \
490 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
491 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
492
493#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
494 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
495 IEMMODE_64BIT, pCallEntry->idxInstr); \
496 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
497
498#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
499 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
500 IEMMODE_64BIT, pCallEntry->idxInstr); \
501 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
502 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
503
504/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
505 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
506 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
507DECL_INLINE_THROW(uint32_t)
508iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
509 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
510{
511 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
512
513 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
514 off = iemNativeRegFlushPendingWrites(pReNative, off);
515
516#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
517 Assert(pReNative->Core.offPc == 0);
518
519 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
520#endif
521
522 /* Allocate a temporary PC register. */
523 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
524
525 /* Perform the addition. */
526 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
527
528 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
529 {
530 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
531 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
532 }
533 else
534 {
535 /* Just truncate the result to 16-bit IP. */
536 Assert(enmEffOpSize == IEMMODE_16BIT);
537 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
538 }
539 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
540
541 /* Free but don't flush the PC register. */
542 iemNativeRegFreeTmp(pReNative, idxPcReg);
543
544 return off;
545}
546
547
548#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
549 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
550 (a_enmEffOpSize), pCallEntry->idxInstr); \
551 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
552
553#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
554 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
555 (a_enmEffOpSize), pCallEntry->idxInstr); \
556 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
557 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
558
559#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
560 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
561 IEMMODE_16BIT, pCallEntry->idxInstr); \
562 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
563
564#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
565 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
566 IEMMODE_16BIT, pCallEntry->idxInstr); \
567 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
568 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
569
570#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
571 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
572 IEMMODE_32BIT, pCallEntry->idxInstr); \
573 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
574
575#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
576 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
577 IEMMODE_32BIT, pCallEntry->idxInstr); \
578 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
579 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
580
581/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
582 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
583 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
584DECL_INLINE_THROW(uint32_t)
585iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
586 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
587{
588 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
589
590 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
591 off = iemNativeRegFlushPendingWrites(pReNative, off);
592
593#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
594 Assert(pReNative->Core.offPc == 0);
595
596 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
597#endif
598
599 /* Allocate a temporary PC register. */
600 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
601
602 /* Perform the addition. */
603 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
604
605 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
606 if (enmEffOpSize == IEMMODE_16BIT)
607 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
608
609 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
610/** @todo we can skip this in 32-bit FLAT mode. */
611 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
612
613 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
614
615 /* Free but don't flush the PC register. */
616 iemNativeRegFreeTmp(pReNative, idxPcReg);
617
618 return off;
619}
620
621
622#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
623 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
624 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
625
626#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
627 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
628 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
629 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
630
631#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
632 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
633 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
634
635#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
636 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
637 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
638 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
639
640#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
641 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
642 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
643
644#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
645 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
646 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
647 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
648
649/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
650DECL_INLINE_THROW(uint32_t)
651iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
652 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
653{
654 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
655 off = iemNativeRegFlushPendingWrites(pReNative, off);
656
657#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
658 Assert(pReNative->Core.offPc == 0);
659
660 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
661#endif
662
663 /* Allocate a temporary PC register. */
664 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
665
666 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
667 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
668 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
669 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
670 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
671
672 /* Free but don't flush the PC register. */
673 iemNativeRegFreeTmp(pReNative, idxPcReg);
674
675 return off;
676}
677
678
679
680/*********************************************************************************************************************************
681* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
682*********************************************************************************************************************************/
683
684/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
685#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
686 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
687
688/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
689#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
690 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
691
692/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
693#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
694 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
695
696/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
697 * clears flags. */
698#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
699 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
700 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
701
702/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
703 * clears flags. */
704#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
705 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
706 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
707
708/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
709 * clears flags. */
710#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
711 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
712 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
713
714#undef IEM_MC_SET_RIP_U16_AND_FINISH
715
716
717/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
718#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
719 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
720
721/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
722#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
723 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
724
725/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
726 * clears flags. */
727#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
728 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
729 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
730
731/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
732 * and clears flags. */
733#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
734 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
735 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
736
737#undef IEM_MC_SET_RIP_U32_AND_FINISH
738
739
740/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
741#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
742 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
743
744/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
745 * and clears flags. */
746#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
747 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
748 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
749
750#undef IEM_MC_SET_RIP_U64_AND_FINISH
751
752
753/** Same as iemRegRipJumpU16AndFinishNoFlags,
754 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
755DECL_INLINE_THROW(uint32_t)
756iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
757 uint8_t idxInstr, uint8_t cbVar)
758{
759 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
760 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
761
762 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
763 off = iemNativeRegFlushPendingWrites(pReNative, off);
764
765#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
766 Assert(pReNative->Core.offPc == 0);
767
768 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
769#endif
770
771 /* Get a register with the new PC loaded from idxVarPc.
772 Note! This ASSUMES that the high bits of the GPR is zeroed. */
773 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
774
775 /* Check limit (may #GP(0) + exit TB). */
776 if (!f64Bit)
777/** @todo we can skip this test in FLAT 32-bit mode. */
778 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
779 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
780 else if (cbVar > sizeof(uint32_t))
781 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
782
783 /* Store the result. */
784 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
785
786 iemNativeVarRegisterRelease(pReNative, idxVarPc);
787 /** @todo implictly free the variable? */
788
789 return off;
790}
791
792
793
794/*********************************************************************************************************************************
795* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
796*********************************************************************************************************************************/
797
798#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
799 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
800
801/**
802 * Emits code to check if a \#NM exception should be raised.
803 *
804 * @returns New code buffer offset, UINT32_MAX on failure.
805 * @param pReNative The native recompile state.
806 * @param off The code buffer offset.
807 * @param idxInstr The current instruction.
808 */
809DECL_INLINE_THROW(uint32_t)
810iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
811{
812#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
813 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
814
815 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
816 {
817#endif
818 /*
819 * Make sure we don't have any outstanding guest register writes as we may
820 * raise an #NM and all guest register must be up to date in CPUMCTX.
821 */
822 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
823 off = iemNativeRegFlushPendingWrites(pReNative, off);
824
825#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
826 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
827#else
828 RT_NOREF(idxInstr);
829#endif
830
831 /* Allocate a temporary CR0 register. */
832 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
833 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
834
835 /*
836 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
837 * return raisexcpt();
838 */
839 /* Test and jump. */
840 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
841
842 /* Free but don't flush the CR0 register. */
843 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
844
845#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
846 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
847 }
848 else
849 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
850#endif
851
852 return off;
853}
854
855
856#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
857 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
858
859/**
860 * Emits code to check if a \#MF exception should be raised.
861 *
862 * @returns New code buffer offset, UINT32_MAX on failure.
863 * @param pReNative The native recompile state.
864 * @param off The code buffer offset.
865 * @param idxInstr The current instruction.
866 */
867DECL_INLINE_THROW(uint32_t)
868iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
869{
870 /*
871 * Make sure we don't have any outstanding guest register writes as we may
872 * raise an #MF and all guest register must be up to date in CPUMCTX.
873 */
874 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
875 off = iemNativeRegFlushPendingWrites(pReNative, off);
876
877#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
878 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
879#else
880 RT_NOREF(idxInstr);
881#endif
882
883 /* Allocate a temporary FSW register. */
884 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
885 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
886
887 /*
888 * if (FSW & X86_FSW_ES != 0)
889 * return raisexcpt();
890 */
891 /* Test and jump. */
892 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
893
894 /* Free but don't flush the FSW register. */
895 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
896
897 return off;
898}
899
900
901#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
902 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
903
904/**
905 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
906 *
907 * @returns New code buffer offset, UINT32_MAX on failure.
908 * @param pReNative The native recompile state.
909 * @param off The code buffer offset.
910 * @param idxInstr The current instruction.
911 */
912DECL_INLINE_THROW(uint32_t)
913iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
914{
915#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
916 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
917
918 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
919 {
920#endif
921 /*
922 * Make sure we don't have any outstanding guest register writes as we may
923 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
924 */
925 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
926 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
927
928#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
929 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
930#else
931 RT_NOREF(idxInstr);
932#endif
933
934 /* Allocate a temporary CR0 and CR4 register. */
935 uint8_t const idxLabelRaiseSseRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseRelated);
936 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
937 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
938 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
939
940 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
941#ifdef RT_ARCH_AMD64
942 /*
943 * We do a modified test here:
944 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
945 * else { goto RaiseSseRelated; }
946 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
947 * all targets except the 386, which doesn't support SSE, this should
948 * be a safe assumption.
949 */
950 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
951 //pCodeBuf[off++] = 0xcc;
952 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
953 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
954 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
955 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
956 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
957 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseSseRelated, kIemNativeInstrCond_ne);
958
959#elif defined(RT_ARCH_ARM64)
960 /*
961 * We do a modified test here:
962 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
963 * else { goto RaiseSseRelated; }
964 */
965 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
966 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
967 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
968 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
969 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
970 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
971 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
972 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
973 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
974 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
975 idxLabelRaiseSseRelated);
976
977#else
978# error "Port me!"
979#endif
980
981 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
982 iemNativeRegFreeTmp(pReNative, idxTmpReg);
983 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
984 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
985
986#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
987 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
988 }
989 else
990 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
991#endif
992
993 return off;
994}
995
996
997#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
998 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
999
1000/**
1001 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
1002 *
1003 * @returns New code buffer offset, UINT32_MAX on failure.
1004 * @param pReNative The native recompile state.
1005 * @param off The code buffer offset.
1006 * @param idxInstr The current instruction.
1007 */
1008DECL_INLINE_THROW(uint32_t)
1009iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1010{
1011#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1012 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
1013
1014 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
1015 {
1016#endif
1017 /*
1018 * Make sure we don't have any outstanding guest register writes as we may
1019 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1020 */
1021 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
1022 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
1023
1024#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1025 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1026#else
1027 RT_NOREF(idxInstr);
1028#endif
1029
1030 /* Allocate a temporary CR0, CR4 and XCR0 register. */
1031 uint8_t const idxLabelRaiseAvxRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseAvxRelated);
1032 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
1033 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
1034 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
1035 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1036
1037 /*
1038 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
1039 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
1040 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
1041 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
1042 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
1043 * { likely }
1044 * else { goto RaiseAvxRelated; }
1045 */
1046#ifdef RT_ARCH_AMD64
1047 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
1048 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
1049 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
1050 ^ 0x1a) ) { likely }
1051 else { goto RaiseAvxRelated; } */
1052 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
1053 //pCodeBuf[off++] = 0xcc;
1054 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
1055 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
1056 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
1057 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1058 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
1059 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
1060 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1061 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
1062 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
1063 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
1064 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseAvxRelated, kIemNativeInstrCond_ne);
1065
1066#elif defined(RT_ARCH_ARM64)
1067 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
1068 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
1069 else { goto RaiseAvxRelated; } */
1070 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
1071 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1072 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
1073 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
1074 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
1075 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
1076 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
1077 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
1078 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
1079 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
1080 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
1081 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
1082 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1083 idxLabelRaiseAvxRelated);
1084
1085#else
1086# error "Port me!"
1087#endif
1088
1089 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1090 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1091 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1092 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
1093#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1094 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
1095 }
1096 else
1097 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
1098#endif
1099
1100 return off;
1101}
1102
1103
1104#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1105#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1106 off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off)
1107
1108/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
1109DECL_INLINE_THROW(uint32_t)
1110iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1111{
1112 uint8_t const idxLabelRaiseSseAvxFpRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseAvxFpRelated);
1113 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
1114 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1115
1116 /* mov tmp, varmxcsr */
1117 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1118 /* tmp &= X86_MXCSR_XCPT_MASK */
1119 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
1120 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
1121 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
1122 /* tmp = ~tmp */
1123 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
1124 /* tmp &= mxcsr */
1125 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1126 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
1127 idxLabelRaiseSseAvxFpRelated);
1128
1129 /* Free but don't flush the MXCSR register. */
1130 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
1131 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1132
1133 return off;
1134}
1135#endif
1136
1137
1138#define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1139 off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
1140
1141/**
1142 * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
1143 *
1144 * @returns New code buffer offset, UINT32_MAX on failure.
1145 * @param pReNative The native recompile state.
1146 * @param off The code buffer offset.
1147 * @param idxInstr The current instruction.
1148 */
1149DECL_INLINE_THROW(uint32_t)
1150iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1151{
1152 /*
1153 * Make sure we don't have any outstanding guest register writes as we may
1154 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1155 */
1156 off = iemNativeRegFlushPendingWrites(pReNative, off);
1157
1158#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1159 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1160#else
1161 RT_NOREF(idxInstr);
1162#endif
1163
1164 /* Allocate a temporary CR4 register. */
1165 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
1166 uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
1167 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
1168
1169 /*
1170 * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
1171 * return raisexcpt();
1172 */
1173 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
1174
1175 /* raise \#UD exception unconditionally. */
1176 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
1177
1178 /* Free but don't flush the CR4 register. */
1179 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1180
1181 return off;
1182}
1183
1184
1185#define IEM_MC_RAISE_DIVIDE_ERROR() \
1186 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
1187
1188/**
1189 * Emits code to raise a \#DE.
1190 *
1191 * @returns New code buffer offset, UINT32_MAX on failure.
1192 * @param pReNative The native recompile state.
1193 * @param off The code buffer offset.
1194 * @param idxInstr The current instruction.
1195 */
1196DECL_INLINE_THROW(uint32_t)
1197iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1198{
1199 /*
1200 * Make sure we don't have any outstanding guest register writes as we may
1201 */
1202 off = iemNativeRegFlushPendingWrites(pReNative, off);
1203
1204#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1205 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1206#else
1207 RT_NOREF(idxInstr);
1208#endif
1209
1210 uint8_t const idxLabelRaiseDe = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseDe);
1211
1212 /* raise \#DE exception unconditionally. */
1213 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseDe);
1214
1215 return off;
1216}
1217
1218
1219/*********************************************************************************************************************************
1220* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
1221*********************************************************************************************************************************/
1222
1223/**
1224 * Pushes an IEM_MC_IF_XXX onto the condition stack.
1225 *
1226 * @returns Pointer to the condition stack entry on success, NULL on failure
1227 * (too many nestings)
1228 */
1229DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
1230{
1231#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1232 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
1233#endif
1234
1235 uint32_t const idxStack = pReNative->cCondDepth;
1236 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
1237
1238 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
1239 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
1240
1241 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
1242 pEntry->fInElse = false;
1243 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
1244 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
1245
1246 return pEntry;
1247}
1248
1249
1250/**
1251 * Start of the if-block, snapshotting the register and variable state.
1252 */
1253DECL_INLINE_THROW(void)
1254iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
1255{
1256 Assert(offIfBlock != UINT32_MAX);
1257 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1258 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1259 Assert(!pEntry->fInElse);
1260
1261 /* Define the start of the IF block if request or for disassembly purposes. */
1262 if (idxLabelIf != UINT32_MAX)
1263 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
1264#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1265 else
1266 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
1267#else
1268 RT_NOREF(offIfBlock);
1269#endif
1270
1271#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1272 Assert(pReNative->Core.offPc == 0);
1273#endif
1274
1275 /* Copy the initial state so we can restore it in the 'else' block. */
1276 pEntry->InitialState = pReNative->Core;
1277}
1278
1279
1280#define IEM_MC_ELSE() } while (0); \
1281 off = iemNativeEmitElse(pReNative, off); \
1282 do {
1283
1284/** Emits code related to IEM_MC_ELSE. */
1285DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1286{
1287 /* Check sanity and get the conditional stack entry. */
1288 Assert(off != UINT32_MAX);
1289 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1290 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1291 Assert(!pEntry->fInElse);
1292
1293 /* Jump to the endif */
1294 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
1295
1296 /* Define the else label and enter the else part of the condition. */
1297 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1298 pEntry->fInElse = true;
1299
1300#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1301 Assert(pReNative->Core.offPc == 0);
1302#endif
1303
1304 /* Snapshot the core state so we can do a merge at the endif and restore
1305 the snapshot we took at the start of the if-block. */
1306 pEntry->IfFinalState = pReNative->Core;
1307 pReNative->Core = pEntry->InitialState;
1308
1309 return off;
1310}
1311
1312
1313#define IEM_MC_ENDIF() } while (0); \
1314 off = iemNativeEmitEndIf(pReNative, off)
1315
1316/** Emits code related to IEM_MC_ENDIF. */
1317DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1318{
1319 /* Check sanity and get the conditional stack entry. */
1320 Assert(off != UINT32_MAX);
1321 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1322 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1323
1324#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1325 Assert(pReNative->Core.offPc == 0);
1326#endif
1327
1328 /*
1329 * Now we have find common group with the core state at the end of the
1330 * if-final. Use the smallest common denominator and just drop anything
1331 * that isn't the same in both states.
1332 */
1333 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
1334 * which is why we're doing this at the end of the else-block.
1335 * But we'd need more info about future for that to be worth the effort. */
1336 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
1337 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
1338 {
1339 /* shadow guest stuff first. */
1340 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
1341 if (fGstRegs)
1342 {
1343 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
1344 do
1345 {
1346 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
1347 fGstRegs &= ~RT_BIT_64(idxGstReg);
1348
1349 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
1350 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
1351 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
1352 {
1353 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
1354 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
1355 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
1356 }
1357 } while (fGstRegs);
1358 }
1359 else
1360 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
1361
1362 /* Check variables next. For now we must require them to be identical
1363 or stuff we can recreate. */
1364 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
1365 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
1366 if (fVars)
1367 {
1368 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
1369 do
1370 {
1371 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
1372 fVars &= ~RT_BIT_32(idxVar);
1373
1374 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
1375 {
1376 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
1377 continue;
1378 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
1379 {
1380 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1381 if (idxHstReg != UINT8_MAX)
1382 {
1383 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1384 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1385 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
1386 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1387 }
1388 continue;
1389 }
1390 }
1391 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
1392 continue;
1393
1394 /* Irreconcilable, so drop it. */
1395 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1396 if (idxHstReg != UINT8_MAX)
1397 {
1398 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1399 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1400 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
1401 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1402 }
1403 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1404 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
1405 } while (fVars);
1406 }
1407
1408 /* Finally, check that the host register allocations matches. */
1409 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
1410 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
1411 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
1412 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
1413 }
1414
1415 /*
1416 * Define the endif label and maybe the else one if we're still in the 'if' part.
1417 */
1418 if (!pEntry->fInElse)
1419 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1420 else
1421 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
1422 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
1423
1424 /* Pop the conditional stack.*/
1425 pReNative->cCondDepth -= 1;
1426
1427 return off;
1428}
1429
1430
1431#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
1432 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
1433 do {
1434
1435/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
1436DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1437{
1438 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1439 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1440
1441 /* Get the eflags. */
1442 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1443 kIemNativeGstRegUse_ReadOnly);
1444
1445 /* Test and jump. */
1446 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1447
1448 /* Free but don't flush the EFlags register. */
1449 iemNativeRegFreeTmp(pReNative, idxEflReg);
1450
1451 /* Make a copy of the core state now as we start the if-block. */
1452 iemNativeCondStartIfBlock(pReNative, off);
1453
1454 return off;
1455}
1456
1457
1458#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
1459 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
1460 do {
1461
1462/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
1463DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1464{
1465 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1466 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1467
1468 /* Get the eflags. */
1469 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1470 kIemNativeGstRegUse_ReadOnly);
1471
1472 /* Test and jump. */
1473 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1474
1475 /* Free but don't flush the EFlags register. */
1476 iemNativeRegFreeTmp(pReNative, idxEflReg);
1477
1478 /* Make a copy of the core state now as we start the if-block. */
1479 iemNativeCondStartIfBlock(pReNative, off);
1480
1481 return off;
1482}
1483
1484
1485#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
1486 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
1487 do {
1488
1489/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
1490DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1491{
1492 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1493 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1494
1495 /* Get the eflags. */
1496 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1497 kIemNativeGstRegUse_ReadOnly);
1498
1499 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1500 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1501
1502 /* Test and jump. */
1503 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1504
1505 /* Free but don't flush the EFlags register. */
1506 iemNativeRegFreeTmp(pReNative, idxEflReg);
1507
1508 /* Make a copy of the core state now as we start the if-block. */
1509 iemNativeCondStartIfBlock(pReNative, off);
1510
1511 return off;
1512}
1513
1514
1515#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
1516 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
1517 do {
1518
1519/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
1520DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1521{
1522 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1523 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1524
1525 /* Get the eflags. */
1526 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1527 kIemNativeGstRegUse_ReadOnly);
1528
1529 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1530 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1531
1532 /* Test and jump. */
1533 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1534
1535 /* Free but don't flush the EFlags register. */
1536 iemNativeRegFreeTmp(pReNative, idxEflReg);
1537
1538 /* Make a copy of the core state now as we start the if-block. */
1539 iemNativeCondStartIfBlock(pReNative, off);
1540
1541 return off;
1542}
1543
1544
1545#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
1546 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
1547 do {
1548
1549#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
1550 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
1551 do {
1552
1553/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
1554DECL_INLINE_THROW(uint32_t)
1555iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1556 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1557{
1558 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
1559 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1560
1561 /* Get the eflags. */
1562 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1563 kIemNativeGstRegUse_ReadOnly);
1564
1565 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1566 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1567
1568 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1569 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1570 Assert(iBitNo1 != iBitNo2);
1571
1572#ifdef RT_ARCH_AMD64
1573 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
1574
1575 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1576 if (iBitNo1 > iBitNo2)
1577 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1578 else
1579 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1580 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1581
1582#elif defined(RT_ARCH_ARM64)
1583 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1584 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1585
1586 /* and tmpreg, eflreg, #1<<iBitNo1 */
1587 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1588
1589 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1590 if (iBitNo1 > iBitNo2)
1591 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1592 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1593 else
1594 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1595 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1596
1597 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1598
1599#else
1600# error "Port me"
1601#endif
1602
1603 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1604 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1605 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1606
1607 /* Free but don't flush the EFlags and tmp registers. */
1608 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1609 iemNativeRegFreeTmp(pReNative, idxEflReg);
1610
1611 /* Make a copy of the core state now as we start the if-block. */
1612 iemNativeCondStartIfBlock(pReNative, off);
1613
1614 return off;
1615}
1616
1617
1618#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
1619 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
1620 do {
1621
1622#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
1623 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
1624 do {
1625
1626/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
1627 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
1628DECL_INLINE_THROW(uint32_t)
1629iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
1630 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1631{
1632 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
1633 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1634
1635 /* We need an if-block label for the non-inverted variant. */
1636 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
1637 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
1638
1639 /* Get the eflags. */
1640 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1641 kIemNativeGstRegUse_ReadOnly);
1642
1643 /* Translate the flag masks to bit numbers. */
1644 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1645 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1646
1647 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1648 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1649 Assert(iBitNo1 != iBitNo);
1650
1651 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1652 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1653 Assert(iBitNo2 != iBitNo);
1654 Assert(iBitNo2 != iBitNo1);
1655
1656#ifdef RT_ARCH_AMD64
1657 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
1658#elif defined(RT_ARCH_ARM64)
1659 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1660#endif
1661
1662 /* Check for the lone bit first. */
1663 if (!fInverted)
1664 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1665 else
1666 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
1667
1668 /* Then extract and compare the other two bits. */
1669#ifdef RT_ARCH_AMD64
1670 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1671 if (iBitNo1 > iBitNo2)
1672 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1673 else
1674 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1675 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1676
1677#elif defined(RT_ARCH_ARM64)
1678 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1679
1680 /* and tmpreg, eflreg, #1<<iBitNo1 */
1681 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1682
1683 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1684 if (iBitNo1 > iBitNo2)
1685 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1686 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1687 else
1688 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1689 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1690
1691 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1692
1693#else
1694# error "Port me"
1695#endif
1696
1697 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1698 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1699 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1700
1701 /* Free but don't flush the EFlags and tmp registers. */
1702 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1703 iemNativeRegFreeTmp(pReNative, idxEflReg);
1704
1705 /* Make a copy of the core state now as we start the if-block. */
1706 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
1707
1708 return off;
1709}
1710
1711
1712#define IEM_MC_IF_CX_IS_NZ() \
1713 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
1714 do {
1715
1716/** Emits code for IEM_MC_IF_CX_IS_NZ. */
1717DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1718{
1719 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1720
1721 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1722 kIemNativeGstRegUse_ReadOnly);
1723 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
1724 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1725
1726 iemNativeCondStartIfBlock(pReNative, off);
1727 return off;
1728}
1729
1730
1731#define IEM_MC_IF_ECX_IS_NZ() \
1732 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
1733 do {
1734
1735#define IEM_MC_IF_RCX_IS_NZ() \
1736 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
1737 do {
1738
1739/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
1740DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1741{
1742 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1743
1744 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1745 kIemNativeGstRegUse_ReadOnly);
1746 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
1747 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1748
1749 iemNativeCondStartIfBlock(pReNative, off);
1750 return off;
1751}
1752
1753
1754#define IEM_MC_IF_CX_IS_NOT_ONE() \
1755 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
1756 do {
1757
1758/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
1759DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1760{
1761 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1762
1763 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1764 kIemNativeGstRegUse_ReadOnly);
1765#ifdef RT_ARCH_AMD64
1766 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1767#else
1768 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1769 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1770 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1771#endif
1772 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1773
1774 iemNativeCondStartIfBlock(pReNative, off);
1775 return off;
1776}
1777
1778
1779#define IEM_MC_IF_ECX_IS_NOT_ONE() \
1780 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
1781 do {
1782
1783#define IEM_MC_IF_RCX_IS_NOT_ONE() \
1784 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
1785 do {
1786
1787/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
1788DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1789{
1790 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1791
1792 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1793 kIemNativeGstRegUse_ReadOnly);
1794 if (f64Bit)
1795 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1796 else
1797 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1798 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1799
1800 iemNativeCondStartIfBlock(pReNative, off);
1801 return off;
1802}
1803
1804
1805#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1806 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
1807 do {
1808
1809#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1810 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
1811 do {
1812
1813/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
1814 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1815DECL_INLINE_THROW(uint32_t)
1816iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
1817{
1818 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1819 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1820
1821 /* We have to load both RCX and EFLAGS before we can start branching,
1822 otherwise we'll end up in the else-block with an inconsistent
1823 register allocator state.
1824 Doing EFLAGS first as it's more likely to be loaded, right? */
1825 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1826 kIemNativeGstRegUse_ReadOnly);
1827 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1828 kIemNativeGstRegUse_ReadOnly);
1829
1830 /** @todo we could reduce this to a single branch instruction by spending a
1831 * temporary register and some setnz stuff. Not sure if loops are
1832 * worth it. */
1833 /* Check CX. */
1834#ifdef RT_ARCH_AMD64
1835 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1836#else
1837 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1838 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1839 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1840#endif
1841
1842 /* Check the EFlags bit. */
1843 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1844 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1845 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1846 !fCheckIfSet /*fJmpIfSet*/);
1847
1848 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1849 iemNativeRegFreeTmp(pReNative, idxEflReg);
1850
1851 iemNativeCondStartIfBlock(pReNative, off);
1852 return off;
1853}
1854
1855
1856#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1857 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
1858 do {
1859
1860#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1861 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
1862 do {
1863
1864#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1865 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
1866 do {
1867
1868#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1869 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
1870 do {
1871
1872/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
1873 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
1874 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
1875 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1876DECL_INLINE_THROW(uint32_t)
1877iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1878 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
1879{
1880 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1881 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1882
1883 /* We have to load both RCX and EFLAGS before we can start branching,
1884 otherwise we'll end up in the else-block with an inconsistent
1885 register allocator state.
1886 Doing EFLAGS first as it's more likely to be loaded, right? */
1887 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1888 kIemNativeGstRegUse_ReadOnly);
1889 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1890 kIemNativeGstRegUse_ReadOnly);
1891
1892 /** @todo we could reduce this to a single branch instruction by spending a
1893 * temporary register and some setnz stuff. Not sure if loops are
1894 * worth it. */
1895 /* Check RCX/ECX. */
1896 if (f64Bit)
1897 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1898 else
1899 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1900
1901 /* Check the EFlags bit. */
1902 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1903 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1904 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1905 !fCheckIfSet /*fJmpIfSet*/);
1906
1907 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1908 iemNativeRegFreeTmp(pReNative, idxEflReg);
1909
1910 iemNativeCondStartIfBlock(pReNative, off);
1911 return off;
1912}
1913
1914
1915#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
1916 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
1917 do {
1918
1919/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
1920DECL_INLINE_THROW(uint32_t)
1921iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
1922{
1923 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1924
1925 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
1926 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
1927 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
1928 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
1929
1930 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
1931
1932 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
1933
1934 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
1935
1936 iemNativeCondStartIfBlock(pReNative, off);
1937 return off;
1938}
1939
1940
1941#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
1942 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
1943 do {
1944
1945/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
1946DECL_INLINE_THROW(uint32_t)
1947iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
1948{
1949 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1950 Assert(iGReg < 16);
1951
1952 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
1953 kIemNativeGstRegUse_ReadOnly);
1954
1955 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
1956
1957 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
1958
1959 iemNativeCondStartIfBlock(pReNative, off);
1960 return off;
1961}
1962
1963
1964#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1965
1966#define IEM_MC_IF_MXCSR_XCPT_PENDING() \
1967 off = iemNativeEmitIfMxcsrXcptPending(pReNative, off); \
1968 do {
1969
1970/** Emits code for IEM_MC_IF_MXCSR_XCPT_PENDING. */
1971DECL_INLINE_THROW(uint32_t)
1972iemNativeEmitIfMxcsrXcptPending(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1973{
1974 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1975
1976 uint8_t const idxGstMxcsrReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
1977 kIemNativeGstRegUse_Calculation);
1978 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1979
1980 /* mov tmp0, mxcsr */
1981 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp, idxGstMxcsrReg);
1982 /* tmp0 &= X86_MXCSR_XCPT_FLAGS */
1983 off = iemNativeEmitAndGprByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
1984 /* mxcsr &= X86_MXCSR_XCPT_MASK */
1985 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstMxcsrReg, X86_MXCSR_XCPT_MASK);
1986 /* mxcsr ~= mxcsr */
1987 off = iemNativeEmitInvBitsGpr(pReNative, off, idxGstMxcsrReg, idxGstMxcsrReg);
1988 /* mxcsr >>= X86_MXCSR_XCPT_MASK_SHIFT */
1989 off = iemNativeEmitShiftGprRight(pReNative, off, idxGstMxcsrReg, X86_MXCSR_XCPT_MASK_SHIFT);
1990 /* tmp0 &= mxcsr */
1991 off = iemNativeEmitAndGprByGpr(pReNative, off, idxRegTmp, idxGstMxcsrReg);
1992
1993 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxRegTmp, true /*f64Bit*/, pEntry->idxLabelElse);
1994 iemNativeRegFreeTmp(pReNative, idxGstMxcsrReg);
1995 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1996
1997 iemNativeCondStartIfBlock(pReNative, off);
1998 return off;
1999}
2000
2001#endif
2002
2003
2004/*********************************************************************************************************************************
2005* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
2006*********************************************************************************************************************************/
2007
2008#define IEM_MC_NOREF(a_Name) \
2009 RT_NOREF_PV(a_Name)
2010
2011#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
2012 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
2013
2014#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
2015 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
2016
2017#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
2018 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
2019
2020#define IEM_MC_LOCAL(a_Type, a_Name) \
2021 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
2022
2023#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
2024 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
2025
2026
2027/**
2028 * Sets the host register for @a idxVarRc to @a idxReg.
2029 *
2030 * The register must not be allocated. Any guest register shadowing will be
2031 * implictly dropped by this call.
2032 *
2033 * The variable must not have any register associated with it (causes
2034 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
2035 * implied.
2036 *
2037 * @returns idxReg
2038 * @param pReNative The recompiler state.
2039 * @param idxVar The variable.
2040 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
2041 * @param off For recording in debug info.
2042 *
2043 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
2044 */
2045DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
2046{
2047 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2048 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
2049 Assert(!pVar->fRegAcquired);
2050 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2051 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
2052 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
2053
2054 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
2055 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
2056
2057 iemNativeVarSetKindToStack(pReNative, idxVar);
2058 pVar->idxReg = idxReg;
2059
2060 return idxReg;
2061}
2062
2063
2064/**
2065 * A convenient helper function.
2066 */
2067DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
2068 uint8_t idxReg, uint32_t *poff)
2069{
2070 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
2071 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
2072 return idxReg;
2073}
2074
2075
2076/**
2077 * This is called by IEM_MC_END() to clean up all variables.
2078 */
2079DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
2080{
2081 uint32_t const bmVars = pReNative->Core.bmVars;
2082 if (bmVars != 0)
2083 iemNativeVarFreeAllSlow(pReNative, bmVars);
2084 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
2085 Assert(pReNative->Core.bmStack == 0);
2086}
2087
2088
2089#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
2090
2091/**
2092 * This is called by IEM_MC_FREE_LOCAL.
2093 */
2094DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2095{
2096 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2097 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
2098 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2099}
2100
2101
2102#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
2103
2104/**
2105 * This is called by IEM_MC_FREE_ARG.
2106 */
2107DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2108{
2109 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2110 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
2111 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2112}
2113
2114
2115#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
2116
2117/**
2118 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
2119 */
2120DECL_INLINE_THROW(uint32_t)
2121iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
2122{
2123 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
2124 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
2125 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2126 Assert( pVarDst->cbVar == sizeof(uint16_t)
2127 || pVarDst->cbVar == sizeof(uint32_t));
2128
2129 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
2130 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
2131 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
2132 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
2133 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2134
2135 Assert(pVarDst->cbVar < pVarSrc->cbVar);
2136
2137 /*
2138 * Special case for immediates.
2139 */
2140 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
2141 {
2142 switch (pVarDst->cbVar)
2143 {
2144 case sizeof(uint16_t):
2145 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
2146 break;
2147 case sizeof(uint32_t):
2148 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
2149 break;
2150 default: AssertFailed(); break;
2151 }
2152 }
2153 else
2154 {
2155 /*
2156 * The generic solution for now.
2157 */
2158 /** @todo optimize this by having the python script make sure the source
2159 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
2160 * statement. Then we could just transfer the register assignments. */
2161 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
2162 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
2163 switch (pVarDst->cbVar)
2164 {
2165 case sizeof(uint16_t):
2166 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
2167 break;
2168 case sizeof(uint32_t):
2169 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
2170 break;
2171 default: AssertFailed(); break;
2172 }
2173 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
2174 iemNativeVarRegisterRelease(pReNative, idxVarDst);
2175 }
2176 return off;
2177}
2178
2179
2180
2181/*********************************************************************************************************************************
2182* Emitters for IEM_MC_CALL_CIMPL_XXX *
2183*********************************************************************************************************************************/
2184
2185/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
2186DECL_INLINE_THROW(uint32_t)
2187iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
2188 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
2189
2190{
2191 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
2192
2193#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2194 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
2195 when a calls clobber any of the relevatn control registers. */
2196# if 1
2197 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
2198 {
2199 /* Likely as long as call+ret are done via cimpl. */
2200 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
2201 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
2202 }
2203 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
2204 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2205 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
2206 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2207 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
2208 else
2209 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2210 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2211 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2212
2213# else
2214 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
2215 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2216 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
2217 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2218 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
2219 || pfnCImpl == (uintptr_t)iemCImpl_callf
2220 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
2221 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
2222 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2223 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2224 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2225# endif
2226#endif
2227
2228 /*
2229 * Do all the call setup and cleanup.
2230 */
2231 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
2232
2233 /*
2234 * Load the two or three hidden arguments.
2235 */
2236#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2237 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
2238 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2239 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
2240#else
2241 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2242 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
2243#endif
2244
2245 /*
2246 * Make the call and check the return code.
2247 *
2248 * Shadow PC copies are always flushed here, other stuff depends on flags.
2249 * Segment and general purpose registers are explictily flushed via the
2250 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
2251 * macros.
2252 */
2253 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
2254#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2255 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
2256#endif
2257 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
2258 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
2259 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
2260 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
2261
2262 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
2263}
2264
2265
2266#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
2267 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
2268
2269/** Emits code for IEM_MC_CALL_CIMPL_1. */
2270DECL_INLINE_THROW(uint32_t)
2271iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2272 uintptr_t pfnCImpl, uint8_t idxArg0)
2273{
2274 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2275 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
2276}
2277
2278
2279#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
2280 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
2281
2282/** Emits code for IEM_MC_CALL_CIMPL_2. */
2283DECL_INLINE_THROW(uint32_t)
2284iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2285 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
2286{
2287 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2288 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2289 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
2290}
2291
2292
2293#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
2294 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2295 (uintptr_t)a_pfnCImpl, a0, a1, a2)
2296
2297/** Emits code for IEM_MC_CALL_CIMPL_3. */
2298DECL_INLINE_THROW(uint32_t)
2299iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2300 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2301{
2302 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2303 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2304 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2305 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
2306}
2307
2308
2309#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
2310 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2311 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
2312
2313/** Emits code for IEM_MC_CALL_CIMPL_4. */
2314DECL_INLINE_THROW(uint32_t)
2315iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2316 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2317{
2318 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2319 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2320 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2321 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2322 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
2323}
2324
2325
2326#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
2327 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2328 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
2329
2330/** Emits code for IEM_MC_CALL_CIMPL_4. */
2331DECL_INLINE_THROW(uint32_t)
2332iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2333 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
2334{
2335 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2336 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2337 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2338 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2339 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
2340 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
2341}
2342
2343
2344/** Recompiler debugging: Flush guest register shadow copies. */
2345#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
2346
2347
2348
2349/*********************************************************************************************************************************
2350* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
2351*********************************************************************************************************************************/
2352
2353/**
2354 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
2355 */
2356DECL_INLINE_THROW(uint32_t)
2357iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2358 uintptr_t pfnAImpl, uint8_t cArgs)
2359{
2360 if (idxVarRc != UINT8_MAX)
2361 {
2362 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
2363 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
2364 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2365 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2366 }
2367
2368 /*
2369 * Do all the call setup and cleanup.
2370 */
2371 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
2372
2373 /*
2374 * Make the call and update the return code variable if we've got one.
2375 */
2376 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
2377 if (idxVarRc != UINT8_MAX)
2378 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
2379
2380 return off;
2381}
2382
2383
2384
2385#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
2386 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
2387
2388#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
2389 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
2390
2391/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
2392DECL_INLINE_THROW(uint32_t)
2393iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
2394{
2395 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
2396}
2397
2398
2399#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
2400 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
2401
2402#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
2403 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
2404
2405/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
2406DECL_INLINE_THROW(uint32_t)
2407iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
2408{
2409 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2410 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
2411}
2412
2413
2414#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
2415 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
2416
2417#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
2418 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
2419
2420/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
2421DECL_INLINE_THROW(uint32_t)
2422iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2423 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
2424{
2425 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2426 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2427 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
2428}
2429
2430
2431#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
2432 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
2433
2434#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
2435 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
2436
2437/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
2438DECL_INLINE_THROW(uint32_t)
2439iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2440 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2441{
2442 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2443 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2444 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2445 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
2446}
2447
2448
2449#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
2450 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2451
2452#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
2453 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2454
2455/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
2456DECL_INLINE_THROW(uint32_t)
2457iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2458 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2459{
2460 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2461 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2462 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2463 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
2464 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
2465}
2466
2467
2468
2469/*********************************************************************************************************************************
2470* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
2471*********************************************************************************************************************************/
2472
2473#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
2474 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
2475
2476#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2477 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
2478
2479#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2480 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
2481
2482#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2483 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
2484
2485
2486/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
2487 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
2488DECL_INLINE_THROW(uint32_t)
2489iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
2490{
2491 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2492 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2493 Assert(iGRegEx < 20);
2494
2495 /* Same discussion as in iemNativeEmitFetchGregU16 */
2496 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2497 kIemNativeGstRegUse_ReadOnly);
2498
2499 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2500 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2501
2502 /* The value is zero-extended to the full 64-bit host register width. */
2503 if (iGRegEx < 16)
2504 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2505 else
2506 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2507
2508 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2509 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2510 return off;
2511}
2512
2513
2514#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2515 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
2516
2517#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2518 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
2519
2520#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2521 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
2522
2523/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
2524DECL_INLINE_THROW(uint32_t)
2525iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
2526{
2527 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2528 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2529 Assert(iGRegEx < 20);
2530
2531 /* Same discussion as in iemNativeEmitFetchGregU16 */
2532 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2533 kIemNativeGstRegUse_ReadOnly);
2534
2535 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2536 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2537
2538 if (iGRegEx < 16)
2539 {
2540 switch (cbSignExtended)
2541 {
2542 case sizeof(uint16_t):
2543 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2544 break;
2545 case sizeof(uint32_t):
2546 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2547 break;
2548 case sizeof(uint64_t):
2549 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2550 break;
2551 default: AssertFailed(); break;
2552 }
2553 }
2554 else
2555 {
2556 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2557 switch (cbSignExtended)
2558 {
2559 case sizeof(uint16_t):
2560 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2561 break;
2562 case sizeof(uint32_t):
2563 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2564 break;
2565 case sizeof(uint64_t):
2566 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2567 break;
2568 default: AssertFailed(); break;
2569 }
2570 }
2571
2572 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2573 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2574 return off;
2575}
2576
2577
2578
2579#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
2580 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
2581
2582#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
2583 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2584
2585#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
2586 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2587
2588/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
2589DECL_INLINE_THROW(uint32_t)
2590iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2591{
2592 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2593 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2594 Assert(iGReg < 16);
2595
2596 /*
2597 * We can either just load the low 16-bit of the GPR into a host register
2598 * for the variable, or we can do so via a shadow copy host register. The
2599 * latter will avoid having to reload it if it's being stored later, but
2600 * will waste a host register if it isn't touched again. Since we don't
2601 * know what going to happen, we choose the latter for now.
2602 */
2603 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2604 kIemNativeGstRegUse_ReadOnly);
2605
2606 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2607 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2608 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2609 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2610
2611 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2612 return off;
2613}
2614
2615
2616#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
2617 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2618
2619#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
2620 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2621
2622/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
2623DECL_INLINE_THROW(uint32_t)
2624iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
2625{
2626 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2627 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2628 Assert(iGReg < 16);
2629
2630 /*
2631 * We can either just load the low 16-bit of the GPR into a host register
2632 * for the variable, or we can do so via a shadow copy host register. The
2633 * latter will avoid having to reload it if it's being stored later, but
2634 * will waste a host register if it isn't touched again. Since we don't
2635 * know what going to happen, we choose the latter for now.
2636 */
2637 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2638 kIemNativeGstRegUse_ReadOnly);
2639
2640 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2641 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2642 if (cbSignExtended == sizeof(uint32_t))
2643 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2644 else
2645 {
2646 Assert(cbSignExtended == sizeof(uint64_t));
2647 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2648 }
2649 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2650
2651 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2652 return off;
2653}
2654
2655
2656#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
2657 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
2658
2659#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
2660 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
2661
2662/** Emits code for IEM_MC_FETCH_GREG_U32. */
2663DECL_INLINE_THROW(uint32_t)
2664iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2665{
2666 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2667 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2668 Assert(iGReg < 16);
2669
2670 /*
2671 * We can either just load the low 16-bit of the GPR into a host register
2672 * for the variable, or we can do so via a shadow copy host register. The
2673 * latter will avoid having to reload it if it's being stored later, but
2674 * will waste a host register if it isn't touched again. Since we don't
2675 * know what going to happen, we choose the latter for now.
2676 */
2677 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2678 kIemNativeGstRegUse_ReadOnly);
2679
2680 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2681 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2682 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2683 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2684
2685 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2686 return off;
2687}
2688
2689
2690#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
2691 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
2692
2693/** Emits code for IEM_MC_FETCH_GREG_U32. */
2694DECL_INLINE_THROW(uint32_t)
2695iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2696{
2697 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2698 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2699 Assert(iGReg < 16);
2700
2701 /*
2702 * We can either just load the low 32-bit of the GPR into a host register
2703 * for the variable, or we can do so via a shadow copy host register. The
2704 * latter will avoid having to reload it if it's being stored later, but
2705 * will waste a host register if it isn't touched again. Since we don't
2706 * know what going to happen, we choose the latter for now.
2707 */
2708 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2709 kIemNativeGstRegUse_ReadOnly);
2710
2711 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2712 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2713 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2714 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2715
2716 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2717 return off;
2718}
2719
2720
2721#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
2722 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2723
2724#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
2725 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2726
2727/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
2728 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
2729DECL_INLINE_THROW(uint32_t)
2730iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2731{
2732 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2733 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2734 Assert(iGReg < 16);
2735
2736 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2737 kIemNativeGstRegUse_ReadOnly);
2738
2739 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2740 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2741 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
2742 /** @todo name the register a shadow one already? */
2743 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2744
2745 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2746 return off;
2747}
2748
2749
2750
2751/*********************************************************************************************************************************
2752* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
2753*********************************************************************************************************************************/
2754
2755#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
2756 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
2757
2758/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
2759DECL_INLINE_THROW(uint32_t)
2760iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
2761{
2762 Assert(iGRegEx < 20);
2763 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2764 kIemNativeGstRegUse_ForUpdate);
2765#ifdef RT_ARCH_AMD64
2766 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2767
2768 /* To the lowest byte of the register: mov r8, imm8 */
2769 if (iGRegEx < 16)
2770 {
2771 if (idxGstTmpReg >= 8)
2772 pbCodeBuf[off++] = X86_OP_REX_B;
2773 else if (idxGstTmpReg >= 4)
2774 pbCodeBuf[off++] = X86_OP_REX;
2775 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2776 pbCodeBuf[off++] = u8Value;
2777 }
2778 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
2779 else if (idxGstTmpReg < 4)
2780 {
2781 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
2782 pbCodeBuf[off++] = u8Value;
2783 }
2784 else
2785 {
2786 /* ror reg64, 8 */
2787 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2788 pbCodeBuf[off++] = 0xc1;
2789 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2790 pbCodeBuf[off++] = 8;
2791
2792 /* mov reg8, imm8 */
2793 if (idxGstTmpReg >= 8)
2794 pbCodeBuf[off++] = X86_OP_REX_B;
2795 else if (idxGstTmpReg >= 4)
2796 pbCodeBuf[off++] = X86_OP_REX;
2797 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2798 pbCodeBuf[off++] = u8Value;
2799
2800 /* rol reg64, 8 */
2801 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2802 pbCodeBuf[off++] = 0xc1;
2803 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2804 pbCodeBuf[off++] = 8;
2805 }
2806
2807#elif defined(RT_ARCH_ARM64)
2808 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
2809 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2810 if (iGRegEx < 16)
2811 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
2812 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
2813 else
2814 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
2815 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
2816 iemNativeRegFreeTmp(pReNative, idxImmReg);
2817
2818#else
2819# error "Port me!"
2820#endif
2821
2822 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2823
2824 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
2825
2826 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2827 return off;
2828}
2829
2830
2831#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
2832 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
2833
2834/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
2835DECL_INLINE_THROW(uint32_t)
2836iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
2837{
2838 Assert(iGRegEx < 20);
2839 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
2840
2841 /*
2842 * If it's a constant value (unlikely) we treat this as a
2843 * IEM_MC_STORE_GREG_U8_CONST statement.
2844 */
2845 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
2846 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
2847 { /* likely */ }
2848 else
2849 {
2850 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
2851 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2852 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
2853 }
2854
2855 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2856 kIemNativeGstRegUse_ForUpdate);
2857 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
2858
2859#ifdef RT_ARCH_AMD64
2860 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
2861 if (iGRegEx < 16)
2862 {
2863 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
2864 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
2865 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
2866 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
2867 pbCodeBuf[off++] = X86_OP_REX;
2868 pbCodeBuf[off++] = 0x8a;
2869 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
2870 }
2871 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
2872 else if (idxGstTmpReg < 4 && idxVarReg < 4)
2873 {
2874 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
2875 pbCodeBuf[off++] = 0x8a;
2876 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
2877 }
2878 else
2879 {
2880 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
2881
2882 /* ror reg64, 8 */
2883 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2884 pbCodeBuf[off++] = 0xc1;
2885 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2886 pbCodeBuf[off++] = 8;
2887
2888 /* mov reg8, reg8(r/m) */
2889 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
2890 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
2891 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
2892 pbCodeBuf[off++] = X86_OP_REX;
2893 pbCodeBuf[off++] = 0x8a;
2894 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
2895
2896 /* rol reg64, 8 */
2897 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2898 pbCodeBuf[off++] = 0xc1;
2899 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2900 pbCodeBuf[off++] = 8;
2901 }
2902
2903#elif defined(RT_ARCH_ARM64)
2904 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
2905 or
2906 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
2907 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2908 if (iGRegEx < 16)
2909 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
2910 else
2911 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
2912
2913#else
2914# error "Port me!"
2915#endif
2916 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2917
2918 iemNativeVarRegisterRelease(pReNative, idxValueVar);
2919
2920 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
2921 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2922 return off;
2923}
2924
2925
2926
2927#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
2928 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
2929
2930/** Emits code for IEM_MC_STORE_GREG_U16. */
2931DECL_INLINE_THROW(uint32_t)
2932iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
2933{
2934 Assert(iGReg < 16);
2935 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2936 kIemNativeGstRegUse_ForUpdate);
2937#ifdef RT_ARCH_AMD64
2938 /* mov reg16, imm16 */
2939 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
2940 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2941 if (idxGstTmpReg >= 8)
2942 pbCodeBuf[off++] = X86_OP_REX_B;
2943 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
2944 pbCodeBuf[off++] = RT_BYTE1(uValue);
2945 pbCodeBuf[off++] = RT_BYTE2(uValue);
2946
2947#elif defined(RT_ARCH_ARM64)
2948 /* movk xdst, #uValue, lsl #0 */
2949 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2950 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
2951
2952#else
2953# error "Port me!"
2954#endif
2955
2956 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2957
2958 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
2959 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2960 return off;
2961}
2962
2963
2964#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
2965 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
2966
2967/** Emits code for IEM_MC_STORE_GREG_U16. */
2968DECL_INLINE_THROW(uint32_t)
2969iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
2970{
2971 Assert(iGReg < 16);
2972 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
2973
2974 /*
2975 * If it's a constant value (unlikely) we treat this as a
2976 * IEM_MC_STORE_GREG_U16_CONST statement.
2977 */
2978 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
2979 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
2980 { /* likely */ }
2981 else
2982 {
2983 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
2984 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2985 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
2986 }
2987
2988 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2989 kIemNativeGstRegUse_ForUpdate);
2990
2991#ifdef RT_ARCH_AMD64
2992 /* mov reg16, reg16 or [mem16] */
2993 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2994 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2995 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
2996 {
2997 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
2998 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
2999 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
3000 pbCodeBuf[off++] = 0x8b;
3001 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
3002 }
3003 else
3004 {
3005 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
3006 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
3007 if (idxGstTmpReg >= 8)
3008 pbCodeBuf[off++] = X86_OP_REX_R;
3009 pbCodeBuf[off++] = 0x8b;
3010 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
3011 }
3012
3013#elif defined(RT_ARCH_ARM64)
3014 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
3015 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
3016 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3017 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
3018 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3019
3020#else
3021# error "Port me!"
3022#endif
3023
3024 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3025
3026 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3027 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3028 return off;
3029}
3030
3031
3032#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
3033 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
3034
3035/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
3036DECL_INLINE_THROW(uint32_t)
3037iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
3038{
3039 Assert(iGReg < 16);
3040 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3041 kIemNativeGstRegUse_ForFullWrite);
3042 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3043 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3044 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3045 return off;
3046}
3047
3048
3049#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
3050 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
3051
3052/** Emits code for IEM_MC_STORE_GREG_U32. */
3053DECL_INLINE_THROW(uint32_t)
3054iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3055{
3056 Assert(iGReg < 16);
3057 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3058
3059 /*
3060 * If it's a constant value (unlikely) we treat this as a
3061 * IEM_MC_STORE_GREG_U32_CONST statement.
3062 */
3063 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3064 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3065 { /* likely */ }
3066 else
3067 {
3068 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3069 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3070 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
3071 }
3072
3073 /*
3074 * For the rest we allocate a guest register for the variable and writes
3075 * it to the CPUMCTX structure.
3076 */
3077 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3078 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3079#ifdef VBOX_STRICT
3080 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
3081#endif
3082 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3083 return off;
3084}
3085
3086
3087#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
3088 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
3089
3090/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
3091DECL_INLINE_THROW(uint32_t)
3092iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
3093{
3094 Assert(iGReg < 16);
3095 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3096 kIemNativeGstRegUse_ForFullWrite);
3097 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3098 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3099 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3100 return off;
3101}
3102
3103
3104#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
3105 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
3106
3107/** Emits code for IEM_MC_STORE_GREG_U64. */
3108DECL_INLINE_THROW(uint32_t)
3109iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3110{
3111 Assert(iGReg < 16);
3112 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3113
3114 /*
3115 * If it's a constant value (unlikely) we treat this as a
3116 * IEM_MC_STORE_GREG_U64_CONST statement.
3117 */
3118 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3119 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3120 { /* likely */ }
3121 else
3122 {
3123 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3124 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3125 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
3126 }
3127
3128 /*
3129 * For the rest we allocate a guest register for the variable and writes
3130 * it to the CPUMCTX structure.
3131 */
3132 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3133 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3134 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3135 return off;
3136}
3137
3138
3139#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
3140 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
3141
3142/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
3143DECL_INLINE_THROW(uint32_t)
3144iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
3145{
3146 Assert(iGReg < 16);
3147 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3148 kIemNativeGstRegUse_ForUpdate);
3149 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
3150 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3151 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3152 return off;
3153}
3154
3155
3156/*********************************************************************************************************************************
3157* General purpose register manipulation (add, sub). *
3158*********************************************************************************************************************************/
3159
3160#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3161 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3162
3163/** Emits code for IEM_MC_ADD_GREG_U16. */
3164DECL_INLINE_THROW(uint32_t)
3165iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
3166{
3167 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3168 kIemNativeGstRegUse_ForUpdate);
3169
3170#ifdef RT_ARCH_AMD64
3171 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3172 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3173 if (idxGstTmpReg >= 8)
3174 pbCodeBuf[off++] = X86_OP_REX_B;
3175 if (uAddend == 1)
3176 {
3177 pbCodeBuf[off++] = 0xff; /* inc */
3178 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3179 }
3180 else
3181 {
3182 pbCodeBuf[off++] = 0x81;
3183 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3184 pbCodeBuf[off++] = uAddend;
3185 pbCodeBuf[off++] = 0;
3186 }
3187
3188#else
3189 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3190 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3191
3192 /* sub tmp, gstgrp, uAddend */
3193 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
3194
3195 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3196 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3197
3198 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3199#endif
3200
3201 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3202
3203 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3204
3205 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3206 return off;
3207}
3208
3209
3210#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
3211 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3212
3213#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
3214 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3215
3216/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
3217DECL_INLINE_THROW(uint32_t)
3218iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
3219{
3220 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3221 kIemNativeGstRegUse_ForUpdate);
3222
3223#ifdef RT_ARCH_AMD64
3224 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3225 if (f64Bit)
3226 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3227 else if (idxGstTmpReg >= 8)
3228 pbCodeBuf[off++] = X86_OP_REX_B;
3229 if (uAddend == 1)
3230 {
3231 pbCodeBuf[off++] = 0xff; /* inc */
3232 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3233 }
3234 else if (uAddend < 128)
3235 {
3236 pbCodeBuf[off++] = 0x83; /* add */
3237 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3238 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3239 }
3240 else
3241 {
3242 pbCodeBuf[off++] = 0x81; /* add */
3243 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3244 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3245 pbCodeBuf[off++] = 0;
3246 pbCodeBuf[off++] = 0;
3247 pbCodeBuf[off++] = 0;
3248 }
3249
3250#else
3251 /* sub tmp, gstgrp, uAddend */
3252 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3253 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
3254
3255#endif
3256
3257 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3258
3259 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3260
3261 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3262 return off;
3263}
3264
3265
3266
3267#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3268 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3269
3270/** Emits code for IEM_MC_SUB_GREG_U16. */
3271DECL_INLINE_THROW(uint32_t)
3272iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
3273{
3274 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3275 kIemNativeGstRegUse_ForUpdate);
3276
3277#ifdef RT_ARCH_AMD64
3278 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3279 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3280 if (idxGstTmpReg >= 8)
3281 pbCodeBuf[off++] = X86_OP_REX_B;
3282 if (uSubtrahend == 1)
3283 {
3284 pbCodeBuf[off++] = 0xff; /* dec */
3285 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3286 }
3287 else
3288 {
3289 pbCodeBuf[off++] = 0x81;
3290 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3291 pbCodeBuf[off++] = uSubtrahend;
3292 pbCodeBuf[off++] = 0;
3293 }
3294
3295#else
3296 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3297 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3298
3299 /* sub tmp, gstgrp, uSubtrahend */
3300 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
3301
3302 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3303 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3304
3305 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3306#endif
3307
3308 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3309
3310 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3311
3312 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3313 return off;
3314}
3315
3316
3317#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
3318 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3319
3320#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
3321 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3322
3323/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
3324DECL_INLINE_THROW(uint32_t)
3325iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
3326{
3327 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3328 kIemNativeGstRegUse_ForUpdate);
3329
3330#ifdef RT_ARCH_AMD64
3331 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3332 if (f64Bit)
3333 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3334 else if (idxGstTmpReg >= 8)
3335 pbCodeBuf[off++] = X86_OP_REX_B;
3336 if (uSubtrahend == 1)
3337 {
3338 pbCodeBuf[off++] = 0xff; /* dec */
3339 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3340 }
3341 else if (uSubtrahend < 128)
3342 {
3343 pbCodeBuf[off++] = 0x83; /* sub */
3344 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3345 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3346 }
3347 else
3348 {
3349 pbCodeBuf[off++] = 0x81; /* sub */
3350 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3351 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3352 pbCodeBuf[off++] = 0;
3353 pbCodeBuf[off++] = 0;
3354 pbCodeBuf[off++] = 0;
3355 }
3356
3357#else
3358 /* sub tmp, gstgrp, uSubtrahend */
3359 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3360 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
3361
3362#endif
3363
3364 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3365
3366 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3367
3368 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3369 return off;
3370}
3371
3372
3373#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
3374 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3375
3376#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
3377 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3378
3379#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
3380 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3381
3382#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
3383 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3384
3385/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
3386DECL_INLINE_THROW(uint32_t)
3387iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3388{
3389#ifdef VBOX_STRICT
3390 switch (cbMask)
3391 {
3392 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3393 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3394 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3395 case sizeof(uint64_t): break;
3396 default: AssertFailedBreak();
3397 }
3398#endif
3399
3400 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3401 kIemNativeGstRegUse_ForUpdate);
3402
3403 switch (cbMask)
3404 {
3405 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3406 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
3407 break;
3408 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
3409 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
3410 break;
3411 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3412 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3413 break;
3414 case sizeof(uint64_t):
3415 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
3416 break;
3417 default: AssertFailedBreak();
3418 }
3419
3420 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3421
3422 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3423
3424 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3425 return off;
3426}
3427
3428
3429#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
3430 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3431
3432#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
3433 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3434
3435#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
3436 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3437
3438#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
3439 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3440
3441/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
3442DECL_INLINE_THROW(uint32_t)
3443iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3444{
3445#ifdef VBOX_STRICT
3446 switch (cbMask)
3447 {
3448 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3449 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3450 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3451 case sizeof(uint64_t): break;
3452 default: AssertFailedBreak();
3453 }
3454#endif
3455
3456 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3457 kIemNativeGstRegUse_ForUpdate);
3458
3459 switch (cbMask)
3460 {
3461 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3462 case sizeof(uint16_t):
3463 case sizeof(uint64_t):
3464 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
3465 break;
3466 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3467 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3468 break;
3469 default: AssertFailedBreak();
3470 }
3471
3472 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3473
3474 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3475
3476 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3477 return off;
3478}
3479
3480
3481/*********************************************************************************************************************************
3482* Local variable manipulation (add, sub, and, or). *
3483*********************************************************************************************************************************/
3484
3485#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
3486 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3487
3488#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
3489 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3490
3491#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
3492 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3493
3494#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
3495 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3496
3497/** Emits code for AND'ing a local and a constant value. */
3498DECL_INLINE_THROW(uint32_t)
3499iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3500{
3501#ifdef VBOX_STRICT
3502 switch (cbMask)
3503 {
3504 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3505 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3506 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3507 case sizeof(uint64_t): break;
3508 default: AssertFailedBreak();
3509 }
3510#endif
3511
3512 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3513 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3514
3515 if (cbMask <= sizeof(uint32_t))
3516 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
3517 else
3518 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
3519
3520 iemNativeVarRegisterRelease(pReNative, idxVar);
3521 return off;
3522}
3523
3524
3525#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
3526 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3527
3528#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
3529 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3530
3531#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
3532 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3533
3534#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
3535 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3536
3537/** Emits code for OR'ing a local and a constant value. */
3538DECL_INLINE_THROW(uint32_t)
3539iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3540{
3541#ifdef VBOX_STRICT
3542 switch (cbMask)
3543 {
3544 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3545 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3546 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3547 case sizeof(uint64_t): break;
3548 default: AssertFailedBreak();
3549 }
3550#endif
3551
3552 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3553 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3554
3555 if (cbMask <= sizeof(uint32_t))
3556 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
3557 else
3558 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
3559
3560 iemNativeVarRegisterRelease(pReNative, idxVar);
3561 return off;
3562}
3563
3564
3565#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
3566 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
3567
3568#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
3569 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
3570
3571#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
3572 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
3573
3574/** Emits code for reversing the byte order in a local value. */
3575DECL_INLINE_THROW(uint32_t)
3576iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
3577{
3578 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3579 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3580
3581 switch (cbLocal)
3582 {
3583 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
3584 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
3585 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
3586 default: AssertFailedBreak();
3587 }
3588
3589 iemNativeVarRegisterRelease(pReNative, idxVar);
3590 return off;
3591}
3592
3593
3594
3595/*********************************************************************************************************************************
3596* EFLAGS *
3597*********************************************************************************************************************************/
3598
3599#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
3600# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
3601#else
3602# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
3603 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
3604
3605DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
3606{
3607 if (fEflOutput)
3608 {
3609 PVMCPUCC const pVCpu = pReNative->pVCpu;
3610# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3611 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
3612 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
3613 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
3614# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3615 if (fEflOutput & (a_fEfl)) \
3616 { \
3617 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
3618 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3619 else \
3620 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3621 } else do { } while (0)
3622# else
3623 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
3624 IEMLIVENESSBIT const LivenessClobbered =
3625 {
3626 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3627 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3628 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3629 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3630 };
3631 IEMLIVENESSBIT const LivenessDelayable =
3632 {
3633 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3634 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3635 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3636 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3637 };
3638# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3639 if (fEflOutput & (a_fEfl)) \
3640 { \
3641 if (LivenessClobbered.a_fLivenessMember) \
3642 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3643 else if (LivenessDelayable.a_fLivenessMember) \
3644 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
3645 else \
3646 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3647 } else do { } while (0)
3648# endif
3649 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
3650 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
3651 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
3652 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
3653 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
3654 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
3655 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
3656# undef CHECK_FLAG_AND_UPDATE_STATS
3657 }
3658 RT_NOREF(fEflInput);
3659}
3660#endif /* VBOX_WITH_STATISTICS */
3661
3662#undef IEM_MC_FETCH_EFLAGS /* should not be used */
3663#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3664 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
3665
3666/** Handles IEM_MC_FETCH_EFLAGS_EX. */
3667DECL_INLINE_THROW(uint32_t)
3668iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
3669 uint32_t fEflInput, uint32_t fEflOutput)
3670{
3671 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
3672 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
3673 RT_NOREF(fEflInput, fEflOutput);
3674
3675#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3676# ifdef VBOX_STRICT
3677 if ( pReNative->idxCurCall != 0
3678 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
3679 {
3680 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
3681 uint32_t const fBoth = fEflInput | fEflOutput;
3682# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
3683 AssertMsg( !(fBoth & (a_fElfConst)) \
3684 || (!(fEflInput & (a_fElfConst)) \
3685 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
3686 : !(fEflOutput & (a_fElfConst)) \
3687 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
3688 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
3689 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
3690 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
3691 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
3692 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
3693 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
3694 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
3695 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
3696 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
3697# undef ASSERT_ONE_EFL
3698 }
3699# endif
3700#endif
3701
3702 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
3703
3704 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
3705 * the existing shadow copy. */
3706 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
3707 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
3708 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
3709 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
3710 return off;
3711}
3712
3713
3714
3715/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
3716 * start using it with custom native code emission (inlining assembly
3717 * instruction helpers). */
3718#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
3719#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3720 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
3721 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
3722
3723#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
3724#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3725 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
3726 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
3727
3728/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
3729DECL_INLINE_THROW(uint32_t)
3730iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
3731 bool fUpdateSkipping)
3732{
3733 RT_NOREF(fEflOutput);
3734 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
3735 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
3736
3737#ifdef VBOX_STRICT
3738 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
3739 uint32_t offFixup = off;
3740 off = iemNativeEmitJnzToFixed(pReNative, off, off);
3741 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
3742 iemNativeFixupFixedJump(pReNative, offFixup, off);
3743
3744 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
3745 offFixup = off;
3746 off = iemNativeEmitJzToFixed(pReNative, off, off);
3747 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
3748 iemNativeFixupFixedJump(pReNative, offFixup, off);
3749
3750 /** @todo validate that only bits in the fElfOutput mask changed. */
3751#endif
3752
3753#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3754 if (fUpdateSkipping)
3755 {
3756 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
3757 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
3758 else
3759 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
3760 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
3761 }
3762#else
3763 RT_NOREF_PV(fUpdateSkipping);
3764#endif
3765
3766 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
3767 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
3768 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
3769 return off;
3770}
3771
3772
3773
3774/*********************************************************************************************************************************
3775* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
3776*********************************************************************************************************************************/
3777
3778#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
3779 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
3780
3781#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
3782 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
3783
3784#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
3785 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
3786
3787
3788/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
3789 * IEM_MC_FETCH_SREG_ZX_U64. */
3790DECL_INLINE_THROW(uint32_t)
3791iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
3792{
3793 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3794 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
3795 Assert(iSReg < X86_SREG_COUNT);
3796
3797 /*
3798 * For now, we will not create a shadow copy of a selector. The rational
3799 * is that since we do not recompile the popping and loading of segment
3800 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
3801 * pushing and moving to registers, there is only a small chance that the
3802 * shadow copy will be accessed again before the register is reloaded. One
3803 * scenario would be nested called in 16-bit code, but I doubt it's worth
3804 * the extra register pressure atm.
3805 *
3806 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
3807 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
3808 * store scencario covered at present (r160730).
3809 */
3810 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3811 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3812 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
3813 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3814 return off;
3815}
3816
3817
3818
3819/*********************************************************************************************************************************
3820* Register references. *
3821*********************************************************************************************************************************/
3822
3823#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
3824 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
3825
3826#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
3827 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
3828
3829/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
3830DECL_INLINE_THROW(uint32_t)
3831iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
3832{
3833 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
3834 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
3835 Assert(iGRegEx < 20);
3836
3837 if (iGRegEx < 16)
3838 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
3839 else
3840 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
3841
3842 /* If we've delayed writing back the register value, flush it now. */
3843 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
3844
3845 /* If it's not a const reference we need to flush the shadow copy of the register now. */
3846 if (!fConst)
3847 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
3848
3849 return off;
3850}
3851
3852#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
3853 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
3854
3855#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
3856 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
3857
3858#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
3859 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
3860
3861#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
3862 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
3863
3864#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
3865 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
3866
3867#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
3868 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
3869
3870#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
3871 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
3872
3873#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
3874 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
3875
3876#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
3877 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
3878
3879#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
3880 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
3881
3882/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
3883DECL_INLINE_THROW(uint32_t)
3884iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
3885{
3886 Assert(iGReg < 16);
3887 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
3888 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
3889
3890 /* If we've delayed writing back the register value, flush it now. */
3891 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
3892
3893 /* If it's not a const reference we need to flush the shadow copy of the register now. */
3894 if (!fConst)
3895 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
3896
3897 return off;
3898}
3899
3900
3901#undef IEM_MC_REF_EFLAGS /* should not be used. */
3902#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
3903 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
3904 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
3905
3906/** Handles IEM_MC_REF_EFLAGS. */
3907DECL_INLINE_THROW(uint32_t)
3908iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
3909{
3910 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
3911 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
3912
3913#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3914 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
3915
3916 /* Updating the skipping according to the outputs is a little early, but
3917 we don't have any other hooks for references atm. */
3918 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
3919 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
3920 else if (fEflOutput & X86_EFL_STATUS_BITS)
3921 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
3922 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
3923#else
3924 RT_NOREF(fEflInput, fEflOutput);
3925#endif
3926
3927 /* If we've delayed writing back the register value, flush it now. */
3928 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
3929
3930 /* If there is a shadow copy of guest EFLAGS, flush it now. */
3931 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
3932
3933 return off;
3934}
3935
3936
3937/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
3938 * different code from threaded recompiler, maybe it would be helpful. For now
3939 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
3940#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
3941
3942
3943#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
3944 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
3945
3946#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
3947 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
3948
3949#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
3950 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
3951
3952#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3953/* Just being paranoid here. */
3954# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
3955AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
3956AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
3957AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
3958AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
3959# endif
3960AssertCompileMemberOffset(X86XMMREG, au64, 0);
3961AssertCompileMemberOffset(X86XMMREG, au32, 0);
3962AssertCompileMemberOffset(X86XMMREG, ar64, 0);
3963AssertCompileMemberOffset(X86XMMREG, ar32, 0);
3964
3965# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
3966 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
3967# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
3968 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
3969# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
3970 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
3971# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
3972 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
3973#endif
3974
3975/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
3976DECL_INLINE_THROW(uint32_t)
3977iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
3978{
3979 Assert(iXReg < 16);
3980 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
3981 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
3982
3983 /* If we've delayed writing back the register value, flush it now. */
3984 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
3985
3986#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3987 /* If it's not a const reference we need to flush the shadow copy of the register now. */
3988 if (!fConst)
3989 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
3990#else
3991 RT_NOREF(fConst);
3992#endif
3993
3994 return off;
3995}
3996
3997
3998#define IEM_MC_REF_MXCSR(a_pfMxcsr) \
3999 off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
4000
4001/** Handles IEM_MC_REF_MXCSR. */
4002DECL_INLINE_THROW(uint32_t)
4003iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
4004{
4005 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
4006 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4007
4008 /* If we've delayed writing back the register value, flush it now. */
4009 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
4010
4011 /* If there is a shadow copy of guest MXCSR, flush it now. */
4012 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
4013
4014 return off;
4015}
4016
4017
4018
4019/*********************************************************************************************************************************
4020* Effective Address Calculation *
4021*********************************************************************************************************************************/
4022#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
4023 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
4024
4025/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
4026 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
4027DECL_INLINE_THROW(uint32_t)
4028iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4029 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
4030{
4031 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4032
4033 /*
4034 * Handle the disp16 form with no registers first.
4035 *
4036 * Convert to an immediate value, as that'll delay the register allocation
4037 * and assignment till the memory access / call / whatever and we can use
4038 * a more appropriate register (or none at all).
4039 */
4040 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
4041 {
4042 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
4043 return off;
4044 }
4045
4046 /* Determin the displacment. */
4047 uint16_t u16EffAddr;
4048 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4049 {
4050 case 0: u16EffAddr = 0; break;
4051 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
4052 case 2: u16EffAddr = u16Disp; break;
4053 default: AssertFailedStmt(u16EffAddr = 0);
4054 }
4055
4056 /* Determine the registers involved. */
4057 uint8_t idxGstRegBase;
4058 uint8_t idxGstRegIndex;
4059 switch (bRm & X86_MODRM_RM_MASK)
4060 {
4061 case 0:
4062 idxGstRegBase = X86_GREG_xBX;
4063 idxGstRegIndex = X86_GREG_xSI;
4064 break;
4065 case 1:
4066 idxGstRegBase = X86_GREG_xBX;
4067 idxGstRegIndex = X86_GREG_xDI;
4068 break;
4069 case 2:
4070 idxGstRegBase = X86_GREG_xBP;
4071 idxGstRegIndex = X86_GREG_xSI;
4072 break;
4073 case 3:
4074 idxGstRegBase = X86_GREG_xBP;
4075 idxGstRegIndex = X86_GREG_xDI;
4076 break;
4077 case 4:
4078 idxGstRegBase = X86_GREG_xSI;
4079 idxGstRegIndex = UINT8_MAX;
4080 break;
4081 case 5:
4082 idxGstRegBase = X86_GREG_xDI;
4083 idxGstRegIndex = UINT8_MAX;
4084 break;
4085 case 6:
4086 idxGstRegBase = X86_GREG_xBP;
4087 idxGstRegIndex = UINT8_MAX;
4088 break;
4089#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
4090 default:
4091#endif
4092 case 7:
4093 idxGstRegBase = X86_GREG_xBX;
4094 idxGstRegIndex = UINT8_MAX;
4095 break;
4096 }
4097
4098 /*
4099 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
4100 */
4101 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4102 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4103 kIemNativeGstRegUse_ReadOnly);
4104 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
4105 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4106 kIemNativeGstRegUse_ReadOnly)
4107 : UINT8_MAX;
4108#ifdef RT_ARCH_AMD64
4109 if (idxRegIndex == UINT8_MAX)
4110 {
4111 if (u16EffAddr == 0)
4112 {
4113 /* movxz ret, base */
4114 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
4115 }
4116 else
4117 {
4118 /* lea ret32, [base64 + disp32] */
4119 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4120 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4121 if (idxRegRet >= 8 || idxRegBase >= 8)
4122 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4123 pbCodeBuf[off++] = 0x8d;
4124 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4125 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
4126 else
4127 {
4128 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
4129 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4130 }
4131 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4132 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4133 pbCodeBuf[off++] = 0;
4134 pbCodeBuf[off++] = 0;
4135 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4136
4137 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4138 }
4139 }
4140 else
4141 {
4142 /* lea ret32, [index64 + base64 (+ disp32)] */
4143 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4144 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4145 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4146 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4147 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4148 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4149 pbCodeBuf[off++] = 0x8d;
4150 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
4151 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4152 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
4153 if (bMod == X86_MOD_MEM4)
4154 {
4155 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4156 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4157 pbCodeBuf[off++] = 0;
4158 pbCodeBuf[off++] = 0;
4159 }
4160 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4161 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4162 }
4163
4164#elif defined(RT_ARCH_ARM64)
4165 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4166 if (u16EffAddr == 0)
4167 {
4168 if (idxRegIndex == UINT8_MAX)
4169 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
4170 else
4171 {
4172 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
4173 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4174 }
4175 }
4176 else
4177 {
4178 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
4179 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
4180 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
4181 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4182 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
4183 else
4184 {
4185 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
4186 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4187 }
4188 if (idxRegIndex != UINT8_MAX)
4189 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
4190 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4191 }
4192
4193#else
4194# error "port me"
4195#endif
4196
4197 if (idxRegIndex != UINT8_MAX)
4198 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4199 iemNativeRegFreeTmp(pReNative, idxRegBase);
4200 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4201 return off;
4202}
4203
4204
4205#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
4206 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
4207
4208/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
4209 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
4210DECL_INLINE_THROW(uint32_t)
4211iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4212 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
4213{
4214 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4215
4216 /*
4217 * Handle the disp32 form with no registers first.
4218 *
4219 * Convert to an immediate value, as that'll delay the register allocation
4220 * and assignment till the memory access / call / whatever and we can use
4221 * a more appropriate register (or none at all).
4222 */
4223 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4224 {
4225 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
4226 return off;
4227 }
4228
4229 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4230 uint32_t u32EffAddr = 0;
4231 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4232 {
4233 case 0: break;
4234 case 1: u32EffAddr = (int8_t)u32Disp; break;
4235 case 2: u32EffAddr = u32Disp; break;
4236 default: AssertFailed();
4237 }
4238
4239 /* Get the register (or SIB) value. */
4240 uint8_t idxGstRegBase = UINT8_MAX;
4241 uint8_t idxGstRegIndex = UINT8_MAX;
4242 uint8_t cShiftIndex = 0;
4243 switch (bRm & X86_MODRM_RM_MASK)
4244 {
4245 case 0: idxGstRegBase = X86_GREG_xAX; break;
4246 case 1: idxGstRegBase = X86_GREG_xCX; break;
4247 case 2: idxGstRegBase = X86_GREG_xDX; break;
4248 case 3: idxGstRegBase = X86_GREG_xBX; break;
4249 case 4: /* SIB */
4250 {
4251 /* index /w scaling . */
4252 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4253 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4254 {
4255 case 0: idxGstRegIndex = X86_GREG_xAX; break;
4256 case 1: idxGstRegIndex = X86_GREG_xCX; break;
4257 case 2: idxGstRegIndex = X86_GREG_xDX; break;
4258 case 3: idxGstRegIndex = X86_GREG_xBX; break;
4259 case 4: cShiftIndex = 0; /*no index*/ break;
4260 case 5: idxGstRegIndex = X86_GREG_xBP; break;
4261 case 6: idxGstRegIndex = X86_GREG_xSI; break;
4262 case 7: idxGstRegIndex = X86_GREG_xDI; break;
4263 }
4264
4265 /* base */
4266 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
4267 {
4268 case 0: idxGstRegBase = X86_GREG_xAX; break;
4269 case 1: idxGstRegBase = X86_GREG_xCX; break;
4270 case 2: idxGstRegBase = X86_GREG_xDX; break;
4271 case 3: idxGstRegBase = X86_GREG_xBX; break;
4272 case 4:
4273 idxGstRegBase = X86_GREG_xSP;
4274 u32EffAddr += uSibAndRspOffset >> 8;
4275 break;
4276 case 5:
4277 if ((bRm & X86_MODRM_MOD_MASK) != 0)
4278 idxGstRegBase = X86_GREG_xBP;
4279 else
4280 {
4281 Assert(u32EffAddr == 0);
4282 u32EffAddr = u32Disp;
4283 }
4284 break;
4285 case 6: idxGstRegBase = X86_GREG_xSI; break;
4286 case 7: idxGstRegBase = X86_GREG_xDI; break;
4287 }
4288 break;
4289 }
4290 case 5: idxGstRegBase = X86_GREG_xBP; break;
4291 case 6: idxGstRegBase = X86_GREG_xSI; break;
4292 case 7: idxGstRegBase = X86_GREG_xDI; break;
4293 }
4294
4295 /*
4296 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4297 * the start of the function.
4298 */
4299 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4300 {
4301 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
4302 return off;
4303 }
4304
4305 /*
4306 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4307 */
4308 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4309 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4310 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4311 kIemNativeGstRegUse_ReadOnly);
4312 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4313 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4314 kIemNativeGstRegUse_ReadOnly);
4315
4316 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4317 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4318 {
4319 idxRegBase = idxRegIndex;
4320 idxRegIndex = UINT8_MAX;
4321 }
4322
4323#ifdef RT_ARCH_AMD64
4324 if (idxRegIndex == UINT8_MAX)
4325 {
4326 if (u32EffAddr == 0)
4327 {
4328 /* mov ret, base */
4329 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4330 }
4331 else
4332 {
4333 /* lea ret32, [base64 + disp32] */
4334 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4335 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4336 if (idxRegRet >= 8 || idxRegBase >= 8)
4337 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4338 pbCodeBuf[off++] = 0x8d;
4339 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4340 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4341 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4342 else
4343 {
4344 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4345 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4346 }
4347 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4348 if (bMod == X86_MOD_MEM4)
4349 {
4350 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4351 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4352 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4353 }
4354 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4355 }
4356 }
4357 else
4358 {
4359 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4360 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4361 if (idxRegBase == UINT8_MAX)
4362 {
4363 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
4364 if (idxRegRet >= 8 || idxRegIndex >= 8)
4365 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4366 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4367 pbCodeBuf[off++] = 0x8d;
4368 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4369 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4370 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4371 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4372 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4373 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4374 }
4375 else
4376 {
4377 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4378 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4379 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4380 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4381 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4382 pbCodeBuf[off++] = 0x8d;
4383 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4384 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4385 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4386 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4387 if (bMod != X86_MOD_MEM0)
4388 {
4389 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4390 if (bMod == X86_MOD_MEM4)
4391 {
4392 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4393 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4394 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4395 }
4396 }
4397 }
4398 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4399 }
4400
4401#elif defined(RT_ARCH_ARM64)
4402 if (u32EffAddr == 0)
4403 {
4404 if (idxRegIndex == UINT8_MAX)
4405 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4406 else if (idxRegBase == UINT8_MAX)
4407 {
4408 if (cShiftIndex == 0)
4409 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
4410 else
4411 {
4412 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4413 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
4414 }
4415 }
4416 else
4417 {
4418 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4419 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
4420 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4421 }
4422 }
4423 else
4424 {
4425 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
4426 {
4427 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4428 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
4429 }
4430 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
4431 {
4432 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4433 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4434 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
4435 }
4436 else
4437 {
4438 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
4439 if (idxRegBase != UINT8_MAX)
4440 {
4441 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4442 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4443 }
4444 }
4445 if (idxRegIndex != UINT8_MAX)
4446 {
4447 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4448 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
4449 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4450 }
4451 }
4452
4453#else
4454# error "port me"
4455#endif
4456
4457 if (idxRegIndex != UINT8_MAX)
4458 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4459 if (idxRegBase != UINT8_MAX)
4460 iemNativeRegFreeTmp(pReNative, idxRegBase);
4461 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4462 return off;
4463}
4464
4465
4466#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4467 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4468 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4469
4470#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4471 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4472 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4473
4474#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4475 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4476 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
4477
4478/**
4479 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
4480 *
4481 * @returns New off.
4482 * @param pReNative .
4483 * @param off .
4484 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
4485 * bit 4 to REX.X. The two bits are part of the
4486 * REG sub-field, which isn't needed in this
4487 * function.
4488 * @param uSibAndRspOffset Two parts:
4489 * - The first 8 bits make up the SIB byte.
4490 * - The next 8 bits are the fixed RSP/ESP offset
4491 * in case of a pop [xSP].
4492 * @param u32Disp The displacement byte/word/dword, if any.
4493 * @param cbInstr The size of the fully decoded instruction. Used
4494 * for RIP relative addressing.
4495 * @param idxVarRet The result variable number.
4496 * @param f64Bit Whether to use a 64-bit or 32-bit address size
4497 * when calculating the address.
4498 *
4499 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
4500 */
4501DECL_INLINE_THROW(uint32_t)
4502iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
4503 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
4504{
4505 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4506
4507 /*
4508 * Special case the rip + disp32 form first.
4509 */
4510 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4511 {
4512#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4513 /* Need to take the current PC offset into account for the displacement, no need to flush here
4514 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
4515 u32Disp += pReNative->Core.offPc;
4516#endif
4517
4518 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4519 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
4520 kIemNativeGstRegUse_ReadOnly);
4521#ifdef RT_ARCH_AMD64
4522 if (f64Bit)
4523 {
4524 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
4525 if ((int32_t)offFinalDisp == offFinalDisp)
4526 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
4527 else
4528 {
4529 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
4530 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
4531 }
4532 }
4533 else
4534 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
4535
4536#elif defined(RT_ARCH_ARM64)
4537 if (f64Bit)
4538 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4539 (int64_t)(int32_t)u32Disp + cbInstr);
4540 else
4541 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4542 (int32_t)u32Disp + cbInstr);
4543
4544#else
4545# error "Port me!"
4546#endif
4547 iemNativeRegFreeTmp(pReNative, idxRegPc);
4548 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4549 return off;
4550 }
4551
4552 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4553 int64_t i64EffAddr = 0;
4554 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4555 {
4556 case 0: break;
4557 case 1: i64EffAddr = (int8_t)u32Disp; break;
4558 case 2: i64EffAddr = (int32_t)u32Disp; break;
4559 default: AssertFailed();
4560 }
4561
4562 /* Get the register (or SIB) value. */
4563 uint8_t idxGstRegBase = UINT8_MAX;
4564 uint8_t idxGstRegIndex = UINT8_MAX;
4565 uint8_t cShiftIndex = 0;
4566 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
4567 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
4568 else /* SIB: */
4569 {
4570 /* index /w scaling . */
4571 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4572 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4573 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
4574 if (idxGstRegIndex == 4)
4575 {
4576 /* no index */
4577 cShiftIndex = 0;
4578 idxGstRegIndex = UINT8_MAX;
4579 }
4580
4581 /* base */
4582 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
4583 if (idxGstRegBase == 4)
4584 {
4585 /* pop [rsp] hack */
4586 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
4587 }
4588 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
4589 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
4590 {
4591 /* mod=0 and base=5 -> disp32, no base reg. */
4592 Assert(i64EffAddr == 0);
4593 i64EffAddr = (int32_t)u32Disp;
4594 idxGstRegBase = UINT8_MAX;
4595 }
4596 }
4597
4598 /*
4599 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4600 * the start of the function.
4601 */
4602 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4603 {
4604 if (f64Bit)
4605 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
4606 else
4607 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
4608 return off;
4609 }
4610
4611 /*
4612 * Now emit code that calculates:
4613 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4614 * or if !f64Bit:
4615 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4616 */
4617 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4618 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4619 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4620 kIemNativeGstRegUse_ReadOnly);
4621 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4622 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4623 kIemNativeGstRegUse_ReadOnly);
4624
4625 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4626 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4627 {
4628 idxRegBase = idxRegIndex;
4629 idxRegIndex = UINT8_MAX;
4630 }
4631
4632#ifdef RT_ARCH_AMD64
4633 uint8_t bFinalAdj;
4634 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
4635 bFinalAdj = 0; /* likely */
4636 else
4637 {
4638 /* pop [rsp] with a problematic disp32 value. Split out the
4639 RSP offset and add it separately afterwards (bFinalAdj). */
4640 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
4641 Assert(idxGstRegBase == X86_GREG_xSP);
4642 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
4643 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
4644 Assert(bFinalAdj != 0);
4645 i64EffAddr -= bFinalAdj;
4646 Assert((int32_t)i64EffAddr == i64EffAddr);
4647 }
4648 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
4649//pReNative->pInstrBuf[off++] = 0xcc;
4650
4651 if (idxRegIndex == UINT8_MAX)
4652 {
4653 if (u32EffAddr == 0)
4654 {
4655 /* mov ret, base */
4656 if (f64Bit)
4657 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
4658 else
4659 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4660 }
4661 else
4662 {
4663 /* lea ret, [base + disp32] */
4664 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4665 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4666 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
4667 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4668 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4669 | (f64Bit ? X86_OP_REX_W : 0);
4670 pbCodeBuf[off++] = 0x8d;
4671 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4672 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4673 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4674 else
4675 {
4676 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4677 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4678 }
4679 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4680 if (bMod == X86_MOD_MEM4)
4681 {
4682 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4683 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4684 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4685 }
4686 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4687 }
4688 }
4689 else
4690 {
4691 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4692 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4693 if (idxRegBase == UINT8_MAX)
4694 {
4695 /* lea ret, [(index64 << cShiftIndex) + disp32] */
4696 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
4697 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4698 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
4699 | (f64Bit ? X86_OP_REX_W : 0);
4700 pbCodeBuf[off++] = 0x8d;
4701 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4702 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4703 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4704 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4705 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4706 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4707 }
4708 else
4709 {
4710 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4711 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4712 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4713 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4714 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
4715 | (f64Bit ? X86_OP_REX_W : 0);
4716 pbCodeBuf[off++] = 0x8d;
4717 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4718 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4719 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4720 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4721 if (bMod != X86_MOD_MEM0)
4722 {
4723 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4724 if (bMod == X86_MOD_MEM4)
4725 {
4726 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4727 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4728 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4729 }
4730 }
4731 }
4732 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4733 }
4734
4735 if (!bFinalAdj)
4736 { /* likely */ }
4737 else
4738 {
4739 Assert(f64Bit);
4740 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
4741 }
4742
4743#elif defined(RT_ARCH_ARM64)
4744 if (i64EffAddr == 0)
4745 {
4746 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4747 if (idxRegIndex == UINT8_MAX)
4748 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
4749 else if (idxRegBase != UINT8_MAX)
4750 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
4751 f64Bit, false /*fSetFlags*/, cShiftIndex);
4752 else
4753 {
4754 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
4755 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
4756 }
4757 }
4758 else
4759 {
4760 if (f64Bit)
4761 { /* likely */ }
4762 else
4763 i64EffAddr = (int32_t)i64EffAddr;
4764
4765 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
4766 {
4767 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4768 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
4769 }
4770 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
4771 {
4772 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4773 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
4774 }
4775 else
4776 {
4777 if (f64Bit)
4778 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
4779 else
4780 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
4781 if (idxRegBase != UINT8_MAX)
4782 {
4783 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4784 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
4785 }
4786 }
4787 if (idxRegIndex != UINT8_MAX)
4788 {
4789 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4790 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
4791 f64Bit, false /*fSetFlags*/, cShiftIndex);
4792 }
4793 }
4794
4795#else
4796# error "port me"
4797#endif
4798
4799 if (idxRegIndex != UINT8_MAX)
4800 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4801 if (idxRegBase != UINT8_MAX)
4802 iemNativeRegFreeTmp(pReNative, idxRegBase);
4803 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4804 return off;
4805}
4806
4807
4808/*********************************************************************************************************************************
4809* Memory fetches and stores common *
4810*********************************************************************************************************************************/
4811
4812typedef enum IEMNATIVEMITMEMOP
4813{
4814 kIemNativeEmitMemOp_Store = 0,
4815 kIemNativeEmitMemOp_Fetch,
4816 kIemNativeEmitMemOp_Fetch_Zx_U16,
4817 kIemNativeEmitMemOp_Fetch_Zx_U32,
4818 kIemNativeEmitMemOp_Fetch_Zx_U64,
4819 kIemNativeEmitMemOp_Fetch_Sx_U16,
4820 kIemNativeEmitMemOp_Fetch_Sx_U32,
4821 kIemNativeEmitMemOp_Fetch_Sx_U64
4822} IEMNATIVEMITMEMOP;
4823
4824/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
4825 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
4826 * (with iSegReg = UINT8_MAX). */
4827DECL_INLINE_THROW(uint32_t)
4828iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
4829 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
4830 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
4831{
4832 /*
4833 * Assert sanity.
4834 */
4835 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
4836 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
4837 Assert( enmOp != kIemNativeEmitMemOp_Store
4838 || pVarValue->enmKind == kIemNativeVarKind_Immediate
4839 || pVarValue->enmKind == kIemNativeVarKind_Stack);
4840 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
4841 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
4842 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
4843 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
4844 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4845 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
4846#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4847 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
4848 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
4849#else
4850 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
4851#endif
4852 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
4853#ifdef VBOX_STRICT
4854 if (iSegReg == UINT8_MAX)
4855 {
4856 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
4857 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
4858 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
4859 switch (cbMem)
4860 {
4861 case 1:
4862 Assert( pfnFunction
4863 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
4864 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
4865 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
4866 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
4867 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
4868 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
4869 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
4870 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
4871 : UINT64_C(0xc000b000a0009000) ));
4872 break;
4873 case 2:
4874 Assert( pfnFunction
4875 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
4876 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
4877 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
4878 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
4879 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
4880 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
4881 : UINT64_C(0xc000b000a0009000) ));
4882 break;
4883 case 4:
4884 Assert( pfnFunction
4885 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
4886 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
4887 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
4888 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
4889 : UINT64_C(0xc000b000a0009000) ));
4890 break;
4891 case 8:
4892 Assert( pfnFunction
4893 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
4894 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
4895 : UINT64_C(0xc000b000a0009000) ));
4896 break;
4897#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4898 case sizeof(RTUINT128U):
4899 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
4900 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
4901 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
4902 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
4903 || ( enmOp == kIemNativeEmitMemOp_Store
4904 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
4905 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
4906 break;
4907 case sizeof(RTUINT256U):
4908 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
4909 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
4910 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
4911 || ( enmOp == kIemNativeEmitMemOp_Store
4912 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
4913 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
4914 break;
4915#endif
4916 }
4917 }
4918 else
4919 {
4920 Assert(iSegReg < 6);
4921 switch (cbMem)
4922 {
4923 case 1:
4924 Assert( pfnFunction
4925 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
4926 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
4927 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
4928 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
4929 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
4930 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
4931 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
4932 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
4933 : UINT64_C(0xc000b000a0009000) ));
4934 break;
4935 case 2:
4936 Assert( pfnFunction
4937 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
4938 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
4939 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
4940 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
4941 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
4942 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
4943 : UINT64_C(0xc000b000a0009000) ));
4944 break;
4945 case 4:
4946 Assert( pfnFunction
4947 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
4948 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
4949 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
4950 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
4951 : UINT64_C(0xc000b000a0009000) ));
4952 break;
4953 case 8:
4954 Assert( pfnFunction
4955 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
4956 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
4957 : UINT64_C(0xc000b000a0009000) ));
4958 break;
4959#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4960 case sizeof(RTUINT128U):
4961 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
4962 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
4963 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
4964 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
4965 || ( enmOp == kIemNativeEmitMemOp_Store
4966 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
4967 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
4968 break;
4969 case sizeof(RTUINT256U):
4970 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
4971 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
4972 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
4973 || ( enmOp == kIemNativeEmitMemOp_Store
4974 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
4975 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
4976 break;
4977#endif
4978 }
4979 }
4980#endif
4981
4982#ifdef VBOX_STRICT
4983 /*
4984 * Check that the fExec flags we've got make sense.
4985 */
4986 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
4987#endif
4988
4989 /*
4990 * To keep things simple we have to commit any pending writes first as we
4991 * may end up making calls.
4992 */
4993 /** @todo we could postpone this till we make the call and reload the
4994 * registers after returning from the call. Not sure if that's sensible or
4995 * not, though. */
4996#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4997 off = iemNativeRegFlushPendingWrites(pReNative, off);
4998#else
4999 /* The program counter is treated differently for now. */
5000 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
5001#endif
5002
5003#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5004 /*
5005 * Move/spill/flush stuff out of call-volatile registers.
5006 * This is the easy way out. We could contain this to the tlb-miss branch
5007 * by saving and restoring active stuff here.
5008 */
5009 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
5010#endif
5011
5012 /*
5013 * Define labels and allocate the result register (trying for the return
5014 * register if we can).
5015 */
5016 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
5017#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5018 uint8_t idxRegValueFetch = UINT8_MAX;
5019
5020 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5021 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5022 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
5023 else
5024 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5025 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5026 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5027 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5028#else
5029 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5030 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5031 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5032 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5033#endif
5034 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
5035
5036#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5037 uint8_t idxRegValueStore = UINT8_MAX;
5038
5039 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5040 idxRegValueStore = !TlbState.fSkip
5041 && enmOp == kIemNativeEmitMemOp_Store
5042 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5043 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5044 : UINT8_MAX;
5045 else
5046 idxRegValueStore = !TlbState.fSkip
5047 && enmOp == kIemNativeEmitMemOp_Store
5048 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5049 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5050 : UINT8_MAX;
5051
5052#else
5053 uint8_t const idxRegValueStore = !TlbState.fSkip
5054 && enmOp == kIemNativeEmitMemOp_Store
5055 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5056 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5057 : UINT8_MAX;
5058#endif
5059 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
5060 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
5061 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
5062 : UINT32_MAX;
5063
5064 /*
5065 * Jump to the TLB lookup code.
5066 */
5067 if (!TlbState.fSkip)
5068 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
5069
5070 /*
5071 * TlbMiss:
5072 *
5073 * Call helper to do the fetching.
5074 * We flush all guest register shadow copies here.
5075 */
5076 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
5077
5078#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5079 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5080#else
5081 RT_NOREF(idxInstr);
5082#endif
5083
5084#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5085 if (pReNative->Core.offPc)
5086 {
5087 /*
5088 * Update the program counter but restore it at the end of the TlbMiss branch.
5089 * This should allow delaying more program counter updates for the TlbLookup and hit paths
5090 * which are hopefully much more frequent, reducing the amount of memory accesses.
5091 */
5092 /* Allocate a temporary PC register. */
5093 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5094
5095 /* Perform the addition and store the result. */
5096 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5097 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5098
5099 /* Free and flush the PC register. */
5100 iemNativeRegFreeTmp(pReNative, idxPcReg);
5101 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5102 }
5103#endif
5104
5105#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5106 /* Save variables in volatile registers. */
5107 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
5108 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
5109 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
5110 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
5111#endif
5112
5113 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
5114 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5115#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5116 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5117 {
5118 /*
5119 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
5120 *
5121 * @note There was a register variable assigned to the variable for the TlbLookup case above
5122 * which must not be freed or the value loaded into the register will not be synced into the register
5123 * further down the road because the variable doesn't know it had a variable assigned.
5124 *
5125 * @note For loads it is not required to sync what is in the assigned register with the stack slot
5126 * as it will be overwritten anyway.
5127 */
5128 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5129 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
5130 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
5131 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5132 }
5133 else
5134#endif
5135 if (enmOp == kIemNativeEmitMemOp_Store)
5136 {
5137 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5138 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
5139#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5140 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5141#else
5142 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
5143 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5144#endif
5145 }
5146
5147 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
5148 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
5149#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5150 fVolGregMask);
5151#else
5152 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
5153#endif
5154
5155 if (iSegReg != UINT8_MAX)
5156 {
5157 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
5158 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
5159 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
5160 }
5161
5162 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
5163 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5164
5165 /* Done setting up parameters, make the call. */
5166 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
5167
5168 /*
5169 * Put the result in the right register if this is a fetch.
5170 */
5171 if (enmOp != kIemNativeEmitMemOp_Store)
5172 {
5173#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5174 if ( cbMem == sizeof(RTUINT128U)
5175 || cbMem == sizeof(RTUINT256U))
5176 {
5177 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
5178
5179 /* Sync the value on the stack with the host register assigned to the variable. */
5180 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
5181 }
5182 else
5183#endif
5184 {
5185 Assert(idxRegValueFetch == pVarValue->idxReg);
5186 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
5187 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
5188 }
5189 }
5190
5191#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5192 /* Restore variables and guest shadow registers to volatile registers. */
5193 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
5194 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
5195#endif
5196
5197#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5198 if (pReNative->Core.offPc)
5199 {
5200 /*
5201 * Time to restore the program counter to its original value.
5202 */
5203 /* Allocate a temporary PC register. */
5204 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5205
5206 /* Restore the original value. */
5207 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5208 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5209
5210 /* Free and flush the PC register. */
5211 iemNativeRegFreeTmp(pReNative, idxPcReg);
5212 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5213 }
5214#endif
5215
5216#ifdef IEMNATIVE_WITH_TLB_LOOKUP
5217 if (!TlbState.fSkip)
5218 {
5219 /* end of TlbMiss - Jump to the done label. */
5220 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
5221 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
5222
5223 /*
5224 * TlbLookup:
5225 */
5226 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
5227 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
5228 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
5229
5230 /*
5231 * Emit code to do the actual storing / fetching.
5232 */
5233 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
5234# ifdef VBOX_WITH_STATISTICS
5235 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
5236 enmOp == kIemNativeEmitMemOp_Store
5237 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
5238 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
5239# endif
5240 switch (enmOp)
5241 {
5242 case kIemNativeEmitMemOp_Store:
5243 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
5244 {
5245 switch (cbMem)
5246 {
5247 case 1:
5248 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5249 break;
5250 case 2:
5251 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5252 break;
5253 case 4:
5254 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5255 break;
5256 case 8:
5257 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5258 break;
5259#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5260 case sizeof(RTUINT128U):
5261 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5262 break;
5263 case sizeof(RTUINT256U):
5264 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5265 break;
5266#endif
5267 default:
5268 AssertFailed();
5269 }
5270 }
5271 else
5272 {
5273 switch (cbMem)
5274 {
5275 case 1:
5276 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
5277 idxRegMemResult, TlbState.idxReg1);
5278 break;
5279 case 2:
5280 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
5281 idxRegMemResult, TlbState.idxReg1);
5282 break;
5283 case 4:
5284 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
5285 idxRegMemResult, TlbState.idxReg1);
5286 break;
5287 case 8:
5288 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
5289 idxRegMemResult, TlbState.idxReg1);
5290 break;
5291 default:
5292 AssertFailed();
5293 }
5294 }
5295 break;
5296
5297 case kIemNativeEmitMemOp_Fetch:
5298 case kIemNativeEmitMemOp_Fetch_Zx_U16:
5299 case kIemNativeEmitMemOp_Fetch_Zx_U32:
5300 case kIemNativeEmitMemOp_Fetch_Zx_U64:
5301 switch (cbMem)
5302 {
5303 case 1:
5304 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5305 break;
5306 case 2:
5307 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5308 break;
5309 case 4:
5310 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5311 break;
5312 case 8:
5313 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5314 break;
5315#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5316 case sizeof(RTUINT128U):
5317 /*
5318 * No need to sync back the register with the stack, this is done by the generic variable handling
5319 * code if there is a register assigned to a variable and the stack must be accessed.
5320 */
5321 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5322 break;
5323 case sizeof(RTUINT256U):
5324 /*
5325 * No need to sync back the register with the stack, this is done by the generic variable handling
5326 * code if there is a register assigned to a variable and the stack must be accessed.
5327 */
5328 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5329 break;
5330#endif
5331 default:
5332 AssertFailed();
5333 }
5334 break;
5335
5336 case kIemNativeEmitMemOp_Fetch_Sx_U16:
5337 Assert(cbMem == 1);
5338 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5339 break;
5340
5341 case kIemNativeEmitMemOp_Fetch_Sx_U32:
5342 Assert(cbMem == 1 || cbMem == 2);
5343 if (cbMem == 1)
5344 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5345 else
5346 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5347 break;
5348
5349 case kIemNativeEmitMemOp_Fetch_Sx_U64:
5350 switch (cbMem)
5351 {
5352 case 1:
5353 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5354 break;
5355 case 2:
5356 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5357 break;
5358 case 4:
5359 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5360 break;
5361 default:
5362 AssertFailed();
5363 }
5364 break;
5365
5366 default:
5367 AssertFailed();
5368 }
5369
5370 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
5371
5372 /*
5373 * TlbDone:
5374 */
5375 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
5376
5377 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
5378
5379# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5380 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
5381 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5382# endif
5383 }
5384#else
5385 RT_NOREF(fAlignMask, idxLabelTlbMiss);
5386#endif
5387
5388 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
5389 iemNativeVarRegisterRelease(pReNative, idxVarValue);
5390 return off;
5391}
5392
5393
5394
5395/*********************************************************************************************************************************
5396* Memory fetches (IEM_MEM_FETCH_XXX). *
5397*********************************************************************************************************************************/
5398
5399/* 8-bit segmented: */
5400#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
5401 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
5402 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5403 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5404
5405#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5406 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5407 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5408 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5409
5410#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5411 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5412 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5413 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5414
5415#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5416 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5417 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5418 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5419
5420#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5421 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5422 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5423 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5424
5425#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5426 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5427 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5428 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5429
5430#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5431 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5432 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5433 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5434
5435/* 16-bit segmented: */
5436#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5437 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5438 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5439 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5440
5441#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5442 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5443 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5444 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5445
5446#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5447 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5448 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5449 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5450
5451#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5452 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5453 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5454 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5455
5456#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5457 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5458 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5459 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5460
5461#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5462 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5463 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5464 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5465
5466
5467/* 32-bit segmented: */
5468#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5469 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5470 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5471 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5472
5473#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5474 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5475 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5476 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5477
5478#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5479 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5480 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5481 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5482
5483#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5484 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5485 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5486 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5487
5488AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
5489#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
5490 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
5491 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
5492 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5493
5494
5495/* 64-bit segmented: */
5496#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5497 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5498 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5499 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5500
5501AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
5502#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
5503 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
5504 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
5505 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5506
5507
5508/* 8-bit flat: */
5509#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
5510 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
5511 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5512 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5513
5514#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
5515 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5516 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5517 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5518
5519#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
5520 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5521 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5522 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5523
5524#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
5525 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5526 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5527 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5528
5529#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
5530 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5531 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5532 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5533
5534#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
5535 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5536 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5537 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5538
5539#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
5540 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5541 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5542 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5543
5544
5545/* 16-bit flat: */
5546#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
5547 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5548 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5549 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5550
5551#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
5552 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5553 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5554 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5555
5556#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
5557 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5558 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5559 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5560
5561#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
5562 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5563 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5564 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5565
5566#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
5567 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5568 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5569 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5570
5571#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
5572 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5573 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5574 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5575
5576/* 32-bit flat: */
5577#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
5578 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5579 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5580 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5581
5582#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
5583 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5584 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5585 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5586
5587#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
5588 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5589 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5590 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5591
5592#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
5593 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5594 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5595 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5596
5597#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
5598 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
5599 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
5600 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5601
5602
5603/* 64-bit flat: */
5604#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
5605 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5606 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5607 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
5608
5609#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
5610 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
5611 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
5612 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
5613
5614#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5615/* 128-bit segmented: */
5616#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
5617 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5618 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5619 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
5620
5621#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
5622 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5623 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5624 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
5625
5626AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
5627#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
5628 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, \
5629 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
5630 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
5631
5632#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
5633 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5634 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5635 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
5636
5637/* 128-bit flat: */
5638#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
5639 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
5640 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5641 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
5642
5643#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
5644 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
5645 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5646 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
5647
5648#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
5649 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
5650 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
5651 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
5652
5653#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
5654 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
5655 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5656 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
5657
5658/* 256-bit segmented: */
5659#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
5660 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
5661 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5662 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
5663
5664#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
5665 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
5666 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5667 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
5668
5669
5670/* 256-bit flat: */
5671#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
5672 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
5673 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5674 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
5675
5676#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
5677 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
5678 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5679 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
5680#endif
5681
5682
5683/*********************************************************************************************************************************
5684* Memory stores (IEM_MEM_STORE_XXX). *
5685*********************************************************************************************************************************/
5686
5687#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
5688 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
5689 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
5690 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
5691
5692#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
5693 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
5694 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
5695 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
5696
5697#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
5698 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
5699 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
5700 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
5701
5702#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
5703 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
5704 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
5705 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
5706
5707
5708#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
5709 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
5710 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
5711 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
5712
5713#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
5714 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
5715 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
5716 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
5717
5718#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
5719 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
5720 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
5721 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
5722
5723#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
5724 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
5725 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
5726 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
5727
5728
5729#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
5730 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
5731 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
5732
5733#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
5734 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
5735 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
5736
5737#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
5738 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
5739 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
5740
5741#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
5742 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
5743 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
5744
5745
5746#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
5747 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
5748 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
5749
5750#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
5751 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
5752 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
5753
5754#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
5755 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
5756 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
5757
5758#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
5759 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
5760 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
5761
5762/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
5763 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
5764DECL_INLINE_THROW(uint32_t)
5765iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
5766 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
5767{
5768 /*
5769 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
5770 * to do the grunt work.
5771 */
5772 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
5773 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
5774 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
5775 pfnFunction, idxInstr);
5776 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
5777 return off;
5778}
5779
5780
5781#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5782# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
5783 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
5784 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
5785 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
5786
5787# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
5788 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
5789 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
5790 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
5791
5792# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
5793 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
5794 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
5795 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
5796
5797# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
5798 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
5799 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
5800 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
5801
5802
5803# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
5804 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
5805 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
5806 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
5807
5808# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
5809 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
5810 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
5811 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
5812
5813# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
5814 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
5815 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
5816 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
5817
5818# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
5819 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
5820 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
5821 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
5822#endif
5823
5824
5825
5826/*********************************************************************************************************************************
5827* Stack Accesses. *
5828*********************************************************************************************************************************/
5829/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
5830#define IEM_MC_PUSH_U16(a_u16Value) \
5831 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
5832 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
5833#define IEM_MC_PUSH_U32(a_u32Value) \
5834 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
5835 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
5836#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
5837 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
5838 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
5839#define IEM_MC_PUSH_U64(a_u64Value) \
5840 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
5841 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
5842
5843#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
5844 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
5845 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
5846#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
5847 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
5848 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
5849#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
5850 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
5851 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
5852
5853#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
5854 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
5855 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
5856#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
5857 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
5858 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
5859
5860
5861DECL_FORCE_INLINE_THROW(uint32_t)
5862iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
5863{
5864 /* Use16BitSp: */
5865#ifdef RT_ARCH_AMD64
5866 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
5867 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
5868#else
5869 /* sub regeff, regrsp, #cbMem */
5870 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
5871 /* and regeff, regeff, #0xffff */
5872 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
5873 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
5874 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
5875 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
5876#endif
5877 return off;
5878}
5879
5880
5881DECL_FORCE_INLINE(uint32_t)
5882iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
5883{
5884 /* Use32BitSp: */
5885 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
5886 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
5887 return off;
5888}
5889
5890
5891/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
5892DECL_INLINE_THROW(uint32_t)
5893iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
5894 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
5895{
5896 /*
5897 * Assert sanity.
5898 */
5899 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
5900 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
5901#ifdef VBOX_STRICT
5902 if (RT_BYTE2(cBitsVarAndFlat) != 0)
5903 {
5904 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
5905 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
5906 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
5907 Assert( pfnFunction
5908 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
5909 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
5910 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
5911 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
5912 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
5913 : UINT64_C(0xc000b000a0009000) ));
5914 }
5915 else
5916 Assert( pfnFunction
5917 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
5918 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
5919 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
5920 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
5921 : UINT64_C(0xc000b000a0009000) ));
5922#endif
5923
5924#ifdef VBOX_STRICT
5925 /*
5926 * Check that the fExec flags we've got make sense.
5927 */
5928 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
5929#endif
5930
5931 /*
5932 * To keep things simple we have to commit any pending writes first as we
5933 * may end up making calls.
5934 */
5935 /** @todo we could postpone this till we make the call and reload the
5936 * registers after returning from the call. Not sure if that's sensible or
5937 * not, though. */
5938 off = iemNativeRegFlushPendingWrites(pReNative, off);
5939
5940 /*
5941 * First we calculate the new RSP and the effective stack pointer value.
5942 * For 64-bit mode and flat 32-bit these two are the same.
5943 * (Code structure is very similar to that of PUSH)
5944 */
5945 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
5946 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
5947 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
5948 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
5949 ? cbMem : sizeof(uint16_t);
5950 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
5951 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
5952 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
5953 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
5954 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
5955 if (cBitsFlat != 0)
5956 {
5957 Assert(idxRegEffSp == idxRegRsp);
5958 Assert(cBitsFlat == 32 || cBitsFlat == 64);
5959 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
5960 if (cBitsFlat == 64)
5961 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
5962 else
5963 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
5964 }
5965 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
5966 {
5967 Assert(idxRegEffSp != idxRegRsp);
5968 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
5969 kIemNativeGstRegUse_ReadOnly);
5970#ifdef RT_ARCH_AMD64
5971 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5972#else
5973 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5974#endif
5975 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
5976 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
5977 offFixupJumpToUseOtherBitSp = off;
5978 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
5979 {
5980 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
5981 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
5982 }
5983 else
5984 {
5985 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
5986 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
5987 }
5988 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5989 }
5990 /* SpUpdateEnd: */
5991 uint32_t const offLabelSpUpdateEnd = off;
5992
5993 /*
5994 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
5995 * we're skipping lookup).
5996 */
5997 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
5998 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
5999 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6000 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6001 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6002 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6003 : UINT32_MAX;
6004 uint8_t const idxRegValue = !TlbState.fSkip
6005 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6006 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
6007 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
6008 : UINT8_MAX;
6009 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6010
6011
6012 if (!TlbState.fSkip)
6013 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6014 else
6015 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6016
6017 /*
6018 * Use16BitSp:
6019 */
6020 if (cBitsFlat == 0)
6021 {
6022#ifdef RT_ARCH_AMD64
6023 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6024#else
6025 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6026#endif
6027 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6028 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6029 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6030 else
6031 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6032 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6033 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6034 }
6035
6036 /*
6037 * TlbMiss:
6038 *
6039 * Call helper to do the pushing.
6040 */
6041 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6042
6043#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6044 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6045#else
6046 RT_NOREF(idxInstr);
6047#endif
6048
6049 /* Save variables in volatile registers. */
6050 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6051 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6052 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
6053 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
6054 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6055
6056 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
6057 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
6058 {
6059 /* Swap them using ARG0 as temp register: */
6060 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
6061 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
6062 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
6063 }
6064 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
6065 {
6066 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
6067 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
6068 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6069
6070 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
6071 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6072 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6073 }
6074 else
6075 {
6076 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
6077 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6078
6079 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
6080 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
6081 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
6082 }
6083
6084 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6085 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6086
6087 /* Done setting up parameters, make the call. */
6088 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6089
6090 /* Restore variables and guest shadow registers to volatile registers. */
6091 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6092 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6093
6094#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6095 if (!TlbState.fSkip)
6096 {
6097 /* end of TlbMiss - Jump to the done label. */
6098 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6099 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6100
6101 /*
6102 * TlbLookup:
6103 */
6104 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
6105 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6106
6107 /*
6108 * Emit code to do the actual storing / fetching.
6109 */
6110 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6111# ifdef VBOX_WITH_STATISTICS
6112 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6113 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6114# endif
6115 if (idxRegValue != UINT8_MAX)
6116 {
6117 switch (cbMemAccess)
6118 {
6119 case 2:
6120 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6121 break;
6122 case 4:
6123 if (!fIsIntelSeg)
6124 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6125 else
6126 {
6127 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
6128 PUSH FS in real mode, so we have to try emulate that here.
6129 We borrow the now unused idxReg1 from the TLB lookup code here. */
6130 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
6131 kIemNativeGstReg_EFlags);
6132 if (idxRegEfl != UINT8_MAX)
6133 {
6134#ifdef ARCH_AMD64
6135 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
6136 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6137 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6138#else
6139 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
6140 off, TlbState.idxReg1, idxRegEfl,
6141 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6142#endif
6143 iemNativeRegFreeTmp(pReNative, idxRegEfl);
6144 }
6145 else
6146 {
6147 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
6148 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
6149 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6150 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6151 }
6152 /* ASSUMES the upper half of idxRegValue is ZERO. */
6153 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
6154 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
6155 }
6156 break;
6157 case 8:
6158 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6159 break;
6160 default:
6161 AssertFailed();
6162 }
6163 }
6164 else
6165 {
6166 switch (cbMemAccess)
6167 {
6168 case 2:
6169 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6170 idxRegMemResult, TlbState.idxReg1);
6171 break;
6172 case 4:
6173 Assert(!fIsSegReg);
6174 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6175 idxRegMemResult, TlbState.idxReg1);
6176 break;
6177 case 8:
6178 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
6179 break;
6180 default:
6181 AssertFailed();
6182 }
6183 }
6184
6185 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6186 TlbState.freeRegsAndReleaseVars(pReNative);
6187
6188 /*
6189 * TlbDone:
6190 *
6191 * Commit the new RSP value.
6192 */
6193 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6194 }
6195#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6196
6197 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
6198 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6199 if (idxRegEffSp != idxRegRsp)
6200 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6201
6202 /* The value variable is implictly flushed. */
6203 if (idxRegValue != UINT8_MAX)
6204 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6205 iemNativeVarFreeLocal(pReNative, idxVarValue);
6206
6207 return off;
6208}
6209
6210
6211
6212/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
6213#define IEM_MC_POP_GREG_U16(a_iGReg) \
6214 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6215 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
6216#define IEM_MC_POP_GREG_U32(a_iGReg) \
6217 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6218 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
6219#define IEM_MC_POP_GREG_U64(a_iGReg) \
6220 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6221 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
6222
6223#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
6224 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6225 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6226#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
6227 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6228 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
6229
6230#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
6231 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6232 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6233#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
6234 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6235 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
6236
6237
6238DECL_FORCE_INLINE_THROW(uint32_t)
6239iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
6240 uint8_t idxRegTmp)
6241{
6242 /* Use16BitSp: */
6243#ifdef RT_ARCH_AMD64
6244 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6245 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6246 RT_NOREF(idxRegTmp);
6247#else
6248 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
6249 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
6250 /* add tmp, regrsp, #cbMem */
6251 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
6252 /* and tmp, tmp, #0xffff */
6253 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6254 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
6255 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
6256 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
6257#endif
6258 return off;
6259}
6260
6261
6262DECL_FORCE_INLINE(uint32_t)
6263iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6264{
6265 /* Use32BitSp: */
6266 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6267 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6268 return off;
6269}
6270
6271
6272/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
6273DECL_INLINE_THROW(uint32_t)
6274iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
6275 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6276{
6277 /*
6278 * Assert sanity.
6279 */
6280 Assert(idxGReg < 16);
6281#ifdef VBOX_STRICT
6282 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6283 {
6284 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6285 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6286 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6287 Assert( pfnFunction
6288 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6289 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
6290 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6291 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
6292 : UINT64_C(0xc000b000a0009000) ));
6293 }
6294 else
6295 Assert( pfnFunction
6296 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
6297 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
6298 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
6299 : UINT64_C(0xc000b000a0009000) ));
6300#endif
6301
6302#ifdef VBOX_STRICT
6303 /*
6304 * Check that the fExec flags we've got make sense.
6305 */
6306 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6307#endif
6308
6309 /*
6310 * To keep things simple we have to commit any pending writes first as we
6311 * may end up making calls.
6312 */
6313 off = iemNativeRegFlushPendingWrites(pReNative, off);
6314
6315 /*
6316 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
6317 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
6318 * directly as the effective stack pointer.
6319 * (Code structure is very similar to that of PUSH)
6320 */
6321 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6322 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6323 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6324 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6325 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6326 /** @todo can do a better job picking the register here. For cbMem >= 4 this
6327 * will be the resulting register value. */
6328 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
6329
6330 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6331 if (cBitsFlat != 0)
6332 {
6333 Assert(idxRegEffSp == idxRegRsp);
6334 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6335 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6336 }
6337 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6338 {
6339 Assert(idxRegEffSp != idxRegRsp);
6340 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6341 kIemNativeGstRegUse_ReadOnly);
6342#ifdef RT_ARCH_AMD64
6343 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6344#else
6345 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6346#endif
6347 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6348 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6349 offFixupJumpToUseOtherBitSp = off;
6350 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6351 {
6352/** @todo can skip idxRegRsp updating when popping ESP. */
6353 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6354 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6355 }
6356 else
6357 {
6358 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6359 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6360 }
6361 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6362 }
6363 /* SpUpdateEnd: */
6364 uint32_t const offLabelSpUpdateEnd = off;
6365
6366 /*
6367 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6368 * we're skipping lookup).
6369 */
6370 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6371 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
6372 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6373 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6374 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6375 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6376 : UINT32_MAX;
6377
6378 if (!TlbState.fSkip)
6379 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6380 else
6381 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6382
6383 /*
6384 * Use16BitSp:
6385 */
6386 if (cBitsFlat == 0)
6387 {
6388#ifdef RT_ARCH_AMD64
6389 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6390#else
6391 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6392#endif
6393 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6394 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6395 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6396 else
6397 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6398 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6399 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6400 }
6401
6402 /*
6403 * TlbMiss:
6404 *
6405 * Call helper to do the pushing.
6406 */
6407 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6408
6409#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6410 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6411#else
6412 RT_NOREF(idxInstr);
6413#endif
6414
6415 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6416 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6417 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
6418 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6419
6420
6421 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
6422 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6423 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6424
6425 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6426 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6427
6428 /* Done setting up parameters, make the call. */
6429 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6430
6431 /* Move the return register content to idxRegMemResult. */
6432 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
6433 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
6434
6435 /* Restore variables and guest shadow registers to volatile registers. */
6436 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6437 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6438
6439#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6440 if (!TlbState.fSkip)
6441 {
6442 /* end of TlbMiss - Jump to the done label. */
6443 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6444 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6445
6446 /*
6447 * TlbLookup:
6448 */
6449 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
6450 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6451
6452 /*
6453 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
6454 */
6455 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6456# ifdef VBOX_WITH_STATISTICS
6457 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6458 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6459# endif
6460 switch (cbMem)
6461 {
6462 case 2:
6463 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6464 break;
6465 case 4:
6466 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6467 break;
6468 case 8:
6469 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6470 break;
6471 default:
6472 AssertFailed();
6473 }
6474
6475 TlbState.freeRegsAndReleaseVars(pReNative);
6476
6477 /*
6478 * TlbDone:
6479 *
6480 * Set the new RSP value (FLAT accesses needs to calculate it first) and
6481 * commit the popped register value.
6482 */
6483 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6484 }
6485#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6486
6487 if (idxGReg != X86_GREG_xSP)
6488 {
6489 /* Set the register. */
6490 if (cbMem >= sizeof(uint32_t))
6491 {
6492#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6493 AssertMsg( pReNative->idxCurCall == 0
6494 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
6495 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
6496#endif
6497 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
6498 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
6499 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6500 }
6501 else
6502 {
6503 Assert(cbMem == sizeof(uint16_t));
6504 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
6505 kIemNativeGstRegUse_ForUpdate);
6506 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
6507 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6508 iemNativeRegFreeTmp(pReNative, idxRegDst);
6509 }
6510
6511 /* Complete RSP calculation for FLAT mode. */
6512 if (idxRegEffSp == idxRegRsp)
6513 {
6514 if (cBitsFlat == 64)
6515 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6516 else
6517 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6518 }
6519 }
6520 else
6521 {
6522 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
6523 if (cbMem == sizeof(uint64_t))
6524 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
6525 else if (cbMem == sizeof(uint32_t))
6526 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
6527 else
6528 {
6529 if (idxRegEffSp == idxRegRsp)
6530 {
6531 if (cBitsFlat == 64)
6532 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6533 else
6534 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6535 }
6536 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
6537 }
6538 }
6539 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
6540
6541 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6542 if (idxRegEffSp != idxRegRsp)
6543 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6544 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6545
6546 return off;
6547}
6548
6549
6550
6551/*********************************************************************************************************************************
6552* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
6553*********************************************************************************************************************************/
6554
6555#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6556 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6557 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
6558 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
6559
6560#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6561 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6562 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
6563 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
6564
6565#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6566 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6567 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
6568 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
6569
6570#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6571 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6572 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
6573 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
6574
6575
6576#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6577 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6578 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6579 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
6580
6581#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6582 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6583 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6584 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
6585
6586#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6587 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6588 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6589 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
6590
6591#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6592 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6593 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6594 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
6595
6596#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6597 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
6598 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6599 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
6600
6601
6602#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6603 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6604 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6605 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
6606
6607#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6608 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6609 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6610 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
6611
6612#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6613 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6614 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6615 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
6616
6617#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6618 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6619 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6620 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
6621
6622#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6623 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
6624 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6625 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
6626
6627
6628#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6629 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6630 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6631 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
6632
6633#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6634 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6635 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6636 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
6637#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6638 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6639 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6640 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
6641
6642#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6643 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6644 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6645 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
6646
6647#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6648 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
6649 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6650 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
6651
6652
6653#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6654 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
6655 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6656 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
6657
6658#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6659 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
6660 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
6661 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
6662
6663
6664#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6665 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6666 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6667 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
6668
6669#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6670 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6671 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6672 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
6673
6674#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6675 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6676 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6677 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
6678
6679#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6680 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6681 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6682 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
6683
6684
6685
6686#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6687 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6688 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
6689 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
6690
6691#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6692 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6693 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
6694 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
6695
6696#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6697 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6698 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
6699 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
6700
6701#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6702 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6703 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
6704 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
6705
6706
6707#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6708 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6709 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6710 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
6711
6712#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6713 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6714 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6715 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
6716
6717#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6718 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6719 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6720 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
6721
6722#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6723 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6724 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6725 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
6726
6727#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
6728 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
6729 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6730 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
6731
6732
6733#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6734 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6735 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6736 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
6737
6738#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6739 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6740 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6741 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
6742
6743#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6744 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6745 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6746 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
6747
6748#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6749 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6750 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6751 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
6752
6753#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
6754 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
6755 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6756 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
6757
6758
6759#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
6760 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6761 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6762 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
6763
6764#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
6765 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6766 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6767 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
6768
6769#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
6770 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6771 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6772 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
6773
6774#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
6775 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6776 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6777 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
6778
6779#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
6780 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
6781 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6782 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
6783
6784
6785#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
6786 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
6787 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6788 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
6789
6790#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
6791 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
6792 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
6793 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
6794
6795
6796#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
6797 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
6798 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6799 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
6800
6801#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
6802 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
6803 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6804 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
6805
6806#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
6807 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
6808 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6809 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
6810
6811#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
6812 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
6813 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6814 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
6815
6816
6817DECL_INLINE_THROW(uint32_t)
6818iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
6819 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
6820 uintptr_t pfnFunction, uint8_t idxInstr)
6821{
6822 /*
6823 * Assert sanity.
6824 */
6825 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
6826 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
6827 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
6828 && pVarMem->cbVar == sizeof(void *),
6829 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6830
6831 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
6832 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
6833 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
6834 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
6835 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6836
6837 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
6838 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
6839 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
6840 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
6841 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6842
6843 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
6844
6845 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6846
6847#ifdef VBOX_STRICT
6848# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
6849 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
6850 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
6851 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
6852 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
6853# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
6854 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
6855 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
6856 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
6857
6858 if (iSegReg == UINT8_MAX)
6859 {
6860 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6861 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6862 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6863 switch (cbMem)
6864 {
6865 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
6866 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
6867 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
6868 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
6869 case 10:
6870 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
6871 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
6872 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
6873 break;
6874 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
6875# if 0
6876 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
6877 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
6878# endif
6879 default: AssertFailed(); break;
6880 }
6881 }
6882 else
6883 {
6884 Assert(iSegReg < 6);
6885 switch (cbMem)
6886 {
6887 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
6888 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
6889 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
6890 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
6891 case 10:
6892 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
6893 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
6894 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
6895 break;
6896 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
6897# if 0
6898 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
6899 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
6900# endif
6901 default: AssertFailed(); break;
6902 }
6903 }
6904# undef IEM_MAP_HLP_FN
6905# undef IEM_MAP_HLP_FN_NO_AT
6906#endif
6907
6908#ifdef VBOX_STRICT
6909 /*
6910 * Check that the fExec flags we've got make sense.
6911 */
6912 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6913#endif
6914
6915 /*
6916 * To keep things simple we have to commit any pending writes first as we
6917 * may end up making calls.
6918 */
6919 off = iemNativeRegFlushPendingWrites(pReNative, off);
6920
6921#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6922 /*
6923 * Move/spill/flush stuff out of call-volatile registers.
6924 * This is the easy way out. We could contain this to the tlb-miss branch
6925 * by saving and restoring active stuff here.
6926 */
6927 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
6928 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
6929#endif
6930
6931 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
6932 while the tlb-miss codepath will temporarily put it on the stack.
6933 Set the the type to stack here so we don't need to do it twice below. */
6934 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
6935 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
6936 /** @todo use a tmp register from TlbState, since they'll be free after tlb
6937 * lookup is done. */
6938
6939 /*
6940 * Define labels and allocate the result register (trying for the return
6941 * register if we can).
6942 */
6943 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6944 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6945 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
6946 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
6947 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
6948 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6949 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6950 : UINT32_MAX;
6951//off=iemNativeEmitBrk(pReNative, off, 0);
6952 /*
6953 * Jump to the TLB lookup code.
6954 */
6955 if (!TlbState.fSkip)
6956 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6957
6958 /*
6959 * TlbMiss:
6960 *
6961 * Call helper to do the fetching.
6962 * We flush all guest register shadow copies here.
6963 */
6964 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
6965
6966#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6967 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6968#else
6969 RT_NOREF(idxInstr);
6970#endif
6971
6972#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6973 /* Save variables in volatile registers. */
6974 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
6975 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6976#endif
6977
6978 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
6979 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
6980#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6981 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
6982#else
6983 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6984#endif
6985
6986 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
6987 if (iSegReg != UINT8_MAX)
6988 {
6989 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
6990 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
6991 }
6992
6993 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
6994 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
6995 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
6996
6997 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6998 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6999
7000 /* Done setting up parameters, make the call. */
7001 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7002
7003 /*
7004 * Put the output in the right registers.
7005 */
7006 Assert(idxRegMemResult == pVarMem->idxReg);
7007 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7008 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7009
7010#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7011 /* Restore variables and guest shadow registers to volatile registers. */
7012 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7013 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7014#endif
7015
7016 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
7017 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
7018
7019#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7020 if (!TlbState.fSkip)
7021 {
7022 /* end of tlbsmiss - Jump to the done label. */
7023 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7024 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7025
7026 /*
7027 * TlbLookup:
7028 */
7029 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
7030 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7031# ifdef VBOX_WITH_STATISTICS
7032 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
7033 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
7034# endif
7035
7036 /* [idxVarUnmapInfo] = 0; */
7037 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
7038
7039 /*
7040 * TlbDone:
7041 */
7042 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7043
7044 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7045
7046# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7047 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7048 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7049# endif
7050 }
7051#else
7052 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
7053#endif
7054
7055 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7056 iemNativeVarRegisterRelease(pReNative, idxVarMem);
7057
7058 return off;
7059}
7060
7061
7062#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
7063 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
7064 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
7065
7066#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
7067 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
7068 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
7069
7070#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
7071 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
7072 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
7073
7074#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
7075 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
7076 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
7077
7078DECL_INLINE_THROW(uint32_t)
7079iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
7080 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
7081{
7082 /*
7083 * Assert sanity.
7084 */
7085 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7086#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
7087 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7088#endif
7089 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
7090 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
7091 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
7092#ifdef VBOX_STRICT
7093 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
7094 {
7095 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
7096 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
7097 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
7098 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
7099 case IEM_ACCESS_TYPE_WRITE:
7100 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
7101 case IEM_ACCESS_TYPE_READ:
7102 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
7103 default: AssertFailed();
7104 }
7105#else
7106 RT_NOREF(fAccess);
7107#endif
7108
7109 /*
7110 * To keep things simple we have to commit any pending writes first as we
7111 * may end up making calls (there shouldn't be any at this point, so this
7112 * is just for consistency).
7113 */
7114 /** @todo we could postpone this till we make the call and reload the
7115 * registers after returning from the call. Not sure if that's sensible or
7116 * not, though. */
7117 off = iemNativeRegFlushPendingWrites(pReNative, off);
7118
7119 /*
7120 * Move/spill/flush stuff out of call-volatile registers.
7121 *
7122 * We exclude any register holding the bUnmapInfo variable, as we'll be
7123 * checking it after returning from the call and will free it afterwards.
7124 */
7125 /** @todo save+restore active registers and maybe guest shadows in miss
7126 * scenario. */
7127 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
7128 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
7129
7130 /*
7131 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
7132 * to call the unmap helper function.
7133 *
7134 * The likelyhood of it being zero is higher than for the TLB hit when doing
7135 * the mapping, as a TLB miss for an well aligned and unproblematic memory
7136 * access should also end up with a mapping that won't need special unmapping.
7137 */
7138 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
7139 * should speed up things for the pure interpreter as well when TLBs
7140 * are enabled. */
7141#ifdef RT_ARCH_AMD64
7142 if (pVarUnmapInfo->idxReg == UINT8_MAX)
7143 {
7144 /* test byte [rbp - xxx], 0ffh */
7145 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7146 pbCodeBuf[off++] = 0xf6;
7147 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
7148 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
7149 pbCodeBuf[off++] = 0xff;
7150 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7151 }
7152 else
7153#endif
7154 {
7155 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
7156 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
7157 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
7158 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7159 }
7160 uint32_t const offJmpFixup = off;
7161 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
7162
7163 /*
7164 * Call the unmap helper function.
7165 */
7166#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
7167 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7168#else
7169 RT_NOREF(idxInstr);
7170#endif
7171
7172 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
7173 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
7174 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7175
7176 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7177 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7178
7179 /* Done setting up parameters, make the call. */
7180 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7181
7182 /* The bUnmapInfo variable is implictly free by these MCs. */
7183 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
7184
7185 /*
7186 * Done, just fixup the jump for the non-call case.
7187 */
7188 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
7189
7190 return off;
7191}
7192
7193
7194
7195/*********************************************************************************************************************************
7196* State and Exceptions *
7197*********************************************************************************************************************************/
7198
7199#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7200#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7201
7202#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7203#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7204#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7205
7206#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7207#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7208#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7209
7210
7211DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
7212{
7213 /** @todo this needs a lot more work later. */
7214 RT_NOREF(pReNative, fForChange);
7215 return off;
7216}
7217
7218
7219
7220/*********************************************************************************************************************************
7221* Emitters for FPU related operations. *
7222*********************************************************************************************************************************/
7223
7224#define IEM_MC_FETCH_FCW(a_u16Fcw) \
7225 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
7226
7227/** Emits code for IEM_MC_FETCH_FCW. */
7228DECL_INLINE_THROW(uint32_t)
7229iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7230{
7231 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7232 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7233
7234 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7235
7236 /* Allocate a temporary FCW register. */
7237 /** @todo eliminate extra register */
7238 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
7239 kIemNativeGstRegUse_ReadOnly);
7240
7241 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
7242
7243 /* Free but don't flush the FCW register. */
7244 iemNativeRegFreeTmp(pReNative, idxFcwReg);
7245 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7246
7247 return off;
7248}
7249
7250
7251#define IEM_MC_FETCH_FSW(a_u16Fsw) \
7252 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
7253
7254/** Emits code for IEM_MC_FETCH_FSW. */
7255DECL_INLINE_THROW(uint32_t)
7256iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7257{
7258 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7259 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7260
7261 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
7262 /* Allocate a temporary FSW register. */
7263 /** @todo eliminate extra register */
7264 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
7265 kIemNativeGstRegUse_ReadOnly);
7266
7267 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
7268
7269 /* Free but don't flush the FSW register. */
7270 iemNativeRegFreeTmp(pReNative, idxFswReg);
7271 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7272
7273 return off;
7274}
7275
7276
7277
7278#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7279
7280
7281/*********************************************************************************************************************************
7282* Emitters for SSE/AVX specific operations. *
7283*********************************************************************************************************************************/
7284
7285#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
7286 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
7287
7288/** Emits code for IEM_MC_COPY_XREG_U128. */
7289DECL_INLINE_THROW(uint32_t)
7290iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
7291{
7292 /* This is a nop if the source and destination register are the same. */
7293 if (iXRegDst != iXRegSrc)
7294 {
7295 /* Allocate destination and source register. */
7296 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
7297 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7298 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
7299 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7300
7301 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7302 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXRegDst);
7303 /* We don't need to write everything back here as the destination is marked as dirty and will be flushed automatically. */
7304
7305 /* Free but don't flush the source and destination register. */
7306 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7307 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7308 }
7309
7310 return off;
7311}
7312
7313
7314#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
7315 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
7316
7317/** Emits code for IEM_MC_FETCH_XREG_U128. */
7318DECL_INLINE_THROW(uint32_t)
7319iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
7320{
7321 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7322 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
7323
7324 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7325 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7326
7327 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
7328
7329 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
7330
7331 /* Free but don't flush the source register. */
7332 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7333 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
7334
7335 return off;
7336}
7337
7338
7339#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
7340 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
7341
7342/** Emits code for IEM_MC_FETCH_XREG_U64. */
7343DECL_INLINE_THROW(uint32_t)
7344iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
7345{
7346 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7347 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7348
7349 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7350 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7351
7352 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7353 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7354
7355 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
7356
7357 /* Free but don't flush the source register. */
7358 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7359 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7360
7361 return off;
7362}
7363
7364
7365#define IEM_MC_FETCH_XREG_U32(a_u64Value, a_iXReg, a_iDWord) \
7366 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u64Value, a_iXReg, a_iDWord)
7367
7368/** Emits code for IEM_MC_FETCH_XREG_U32. */
7369DECL_INLINE_THROW(uint32_t)
7370iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
7371{
7372 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7373 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7374
7375 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7376 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7377
7378 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7379 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7380
7381 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
7382
7383 /* Free but don't flush the source register. */
7384 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7385 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7386
7387 return off;
7388}
7389
7390
7391#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
7392 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
7393
7394/** Emits code for IEM_MC_FETCH_XREG_U16. */
7395DECL_INLINE_THROW(uint32_t)
7396iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
7397{
7398 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7399 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7400
7401 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7402 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7403
7404 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7405 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7406
7407 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
7408
7409 /* Free but don't flush the source register. */
7410 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7411 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7412
7413 return off;
7414}
7415
7416
7417#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
7418 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
7419
7420/** Emits code for IEM_MC_FETCH_XREG_U8. */
7421DECL_INLINE_THROW(uint32_t)
7422iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
7423{
7424 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7425 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
7426
7427 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7428 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7429
7430 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7431 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7432
7433 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
7434
7435 /* Free but don't flush the source register. */
7436 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7437 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7438
7439 return off;
7440}
7441
7442
7443#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
7444 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
7445
7446AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7447#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
7448 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
7449
7450
7451/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
7452DECL_INLINE_THROW(uint32_t)
7453iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7454{
7455 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7456 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7457
7458 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7459 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7460
7461 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
7462
7463 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
7464 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7465
7466 /* Free but don't flush the source register. */
7467 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7468 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7469
7470 return off;
7471}
7472
7473
7474#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
7475 off = iemNativeEmitSimdStoreXregU64(pReNative, off, a_iXReg, a_u64Value, a_iQWord)
7476
7477/** Emits code for IEM_MC_STORE_XREG_U64. */
7478DECL_INLINE_THROW(uint32_t)
7479iemNativeEmitSimdStoreXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar, uint8_t iQWord)
7480{
7481 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7482 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7483
7484 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7485 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7486
7487 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7488
7489 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQWord);
7490 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7491
7492 /* Free but don't flush the source register. */
7493 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7494 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7495
7496 return off;
7497}
7498
7499
7500#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
7501 off = iemNativeEmitSimdStoreXregU32(pReNative, off, a_iXReg, a_u32Value, a_iDWord)
7502
7503/** Emits code for IEM_MC_STORE_XREG_U32. */
7504DECL_INLINE_THROW(uint32_t)
7505iemNativeEmitSimdStoreXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar, uint8_t iDWord)
7506{
7507 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7508 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7509
7510 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7511 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7512
7513 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7514
7515 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iDWord);
7516 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7517
7518 /* Free but don't flush the source register. */
7519 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7520 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7521
7522 return off;
7523}
7524
7525
7526#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
7527 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
7528
7529/** Emits code for IEM_MC_STORE_XREG_U32. */
7530DECL_INLINE_THROW(uint32_t)
7531iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7532{
7533 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7534 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7535
7536 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7537 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7538
7539 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7540
7541 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
7542 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7543 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7544 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7545
7546 /* Free but don't flush the source register. */
7547 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7548 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7549
7550 return off;
7551}
7552
7553
7554#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
7555 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
7556
7557/** Emits code for IEM_MC_STORE_XREG_U32. */
7558DECL_INLINE_THROW(uint32_t)
7559iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7560{
7561 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7562 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7563
7564 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7565 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7566
7567 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7568
7569 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
7570 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7571 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7572 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7573
7574 /* Free but don't flush the source register. */
7575 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7576 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7577
7578 return off;
7579}
7580
7581
7582#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
7583 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
7584
7585/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
7586DECL_INLINE_THROW(uint32_t)
7587iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst, uint8_t idxSrcVar, uint8_t iDwSrc)
7588{
7589 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7590 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7591
7592 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7593 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7594
7595 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
7596
7597 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
7598 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
7599 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7600
7601 /* Free but don't flush the destination register. */
7602 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7603 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7604
7605 return off;
7606}
7607
7608
7609#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
7610 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
7611
7612/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
7613DECL_INLINE_THROW(uint32_t)
7614iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
7615{
7616 /*
7617 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
7618 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
7619 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
7620 */
7621 if (iYRegDst != iYRegSrc)
7622 {
7623 /* Allocate destination and source register. */
7624 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7625 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7626 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
7627 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7628
7629 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7630 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7631 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYRegDst);
7632 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYRegDst);
7633
7634 /* Free but don't flush the source and destination register. */
7635 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7636 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7637 }
7638 else
7639 {
7640 /* This effectively only clears the upper 128-bits of the register. */
7641 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7642 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
7643
7644 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
7645 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYRegDst);
7646
7647 /* Free but don't flush the destination register. */
7648 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
7649 }
7650
7651 return off;
7652}
7653
7654
7655#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
7656 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
7657
7658/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
7659DECL_INLINE_THROW(uint32_t)
7660iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
7661{
7662 /*
7663 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
7664 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
7665 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
7666 * iYRegSrc == iYRegDst would effectively only clear any upper 256-bits for a zmm register we don't support yet, so this is just a nop.
7667 */
7668 if (iYRegDst != iYRegSrc)
7669 {
7670 /* Allocate destination and source register. */
7671 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
7672 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
7673 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7674 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7675
7676 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7677 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYRegDst);
7678 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYRegDst);
7679
7680 /* Free but don't flush the source and destination register. */
7681 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7682 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7683 }
7684
7685 return off;
7686}
7687
7688
7689#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc) \
7690 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, 0)
7691
7692/** Emits code for IEM_MC_FETCH_YREG_U128. */
7693DECL_INLINE_THROW(uint32_t)
7694iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
7695{
7696 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7697 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
7698
7699 Assert(iDQWord <= 1);
7700 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7701 iDQWord == 1
7702 ? kIemNativeGstSimdRegLdStSz_High128
7703 : kIemNativeGstSimdRegLdStSz_Low128,
7704 kIemNativeGstRegUse_ReadOnly);
7705
7706 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7707 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
7708
7709 if (iDQWord == 1)
7710 AssertFailed(); /* Not used right now, implement and test when required. */
7711 else
7712 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
7713
7714 /* Free but don't flush the source register. */
7715 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7716 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
7717
7718 return off;
7719}
7720
7721
7722#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
7723 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
7724
7725/** Emits code for IEM_MC_FETCH_YREG_U64. */
7726DECL_INLINE_THROW(uint32_t)
7727iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
7728{
7729 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7730 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7731
7732 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7733 iQWord >= 2
7734 ? kIemNativeGstSimdRegLdStSz_High128
7735 : kIemNativeGstSimdRegLdStSz_Low128,
7736 kIemNativeGstRegUse_ReadOnly);
7737
7738 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7739 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7740
7741 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
7742
7743 /* Free but don't flush the source register. */
7744 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7745 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7746
7747 return off;
7748}
7749
7750
7751#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
7752 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
7753
7754/** Emits code for IEM_MC_FETCH_YREG_U32. */
7755DECL_INLINE_THROW(uint32_t)
7756iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
7757{
7758 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7759 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7760
7761 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7762 iDWord >= 4
7763 ? kIemNativeGstSimdRegLdStSz_High128
7764 : kIemNativeGstSimdRegLdStSz_Low128,
7765 kIemNativeGstRegUse_ReadOnly);
7766
7767 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7768 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7769
7770 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
7771
7772 /* Free but don't flush the source register. */
7773 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7774 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7775
7776 return off;
7777}
7778
7779
7780#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
7781 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
7782
7783/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
7784DECL_INLINE_THROW(uint32_t)
7785iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
7786{
7787 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7788 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
7789
7790 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
7791 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7792
7793 /* Free but don't flush the register. */
7794 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
7795
7796 return off;
7797}
7798
7799
7800#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
7801 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
7802
7803/** Emits code for IEM_MC_STORE_YREG_U128. */
7804DECL_INLINE_THROW(uint32_t)
7805iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
7806{
7807 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7808 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7809
7810 Assert(iDQword <= 1);
7811 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7812 iDQword == 0
7813 ? kIemNativeGstSimdRegLdStSz_Low128
7814 : kIemNativeGstSimdRegLdStSz_High128,
7815 kIemNativeGstRegUse_ForFullWrite);
7816
7817 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
7818
7819 if (iDQword == 0)
7820 {
7821 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
7822 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
7823 }
7824 else
7825 {
7826 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
7827 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7828 }
7829
7830 /* Free but don't flush the source register. */
7831 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7832 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7833
7834 return off;
7835}
7836
7837
7838#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
7839 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
7840
7841/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
7842DECL_INLINE_THROW(uint32_t)
7843iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
7844{
7845 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7846 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7847
7848 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7849 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7850
7851 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
7852
7853 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
7854 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7855 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
7856 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7857
7858 /* Free but don't flush the source register. */
7859 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7860 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7861
7862 return off;
7863}
7864
7865
7866#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
7867 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
7868
7869/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
7870DECL_INLINE_THROW(uint32_t)
7871iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7872{
7873 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7874 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
7875
7876 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7877 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7878
7879 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7880
7881 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
7882 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7883 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7884 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iXReg);
7885
7886 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7887 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7888
7889 return off;
7890}
7891
7892
7893#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
7894 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
7895
7896/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
7897DECL_INLINE_THROW(uint32_t)
7898iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7899{
7900 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7901 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
7902
7903 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7904 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7905
7906 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7907
7908 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
7909 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7910 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7911 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iXReg);
7912
7913 /* Free but don't flush the source register. */
7914 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7915 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7916
7917 return off;
7918}
7919
7920
7921#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
7922 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
7923
7924/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
7925DECL_INLINE_THROW(uint32_t)
7926iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7927{
7928 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7929 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
7930
7931 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7932 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7933
7934 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7935
7936 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
7937 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7938 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7939 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iXReg);
7940
7941 /* Free but don't flush the source register. */
7942 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7943 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7944
7945 return off;
7946}
7947
7948
7949#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
7950 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
7951
7952/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
7953DECL_INLINE_THROW(uint32_t)
7954iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7955{
7956 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7957 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
7958
7959 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7960 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7961
7962 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7963
7964 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
7965 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7966 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7967 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iXReg);
7968
7969 /* Free but don't flush the source register. */
7970 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7971 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7972
7973 return off;
7974}
7975
7976
7977#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
7978 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
7979
7980/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
7981DECL_INLINE_THROW(uint32_t)
7982iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
7983{
7984 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7985 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
7986
7987 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7988 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7989
7990 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7991
7992 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
7993 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
7994 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7995
7996 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7997 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7998
7999 return off;
8000}
8001
8002
8003#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
8004 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
8005
8006/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
8007DECL_INLINE_THROW(uint32_t)
8008iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8009{
8010 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8011 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8012
8013 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8014 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8015
8016 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8017
8018 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8019 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
8020 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
8021
8022 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8023 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8024
8025 return off;
8026}
8027
8028
8029#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8030 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8031
8032/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
8033DECL_INLINE_THROW(uint32_t)
8034iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8035{
8036 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8037 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8038
8039 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8040 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8041
8042 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8043
8044 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8045 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
8046 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
8047
8048 /* Free but don't flush the source register. */
8049 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8050 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8051
8052 return off;
8053}
8054
8055
8056#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8057 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8058
8059/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
8060DECL_INLINE_THROW(uint32_t)
8061iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8062{
8063 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8064 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8065
8066 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8067 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8068
8069 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8070
8071 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8072 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
8073 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
8074
8075 /* Free but don't flush the source register. */
8076 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8077 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8078
8079 return off;
8080}
8081
8082
8083#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8084 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8085
8086/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
8087DECL_INLINE_THROW(uint32_t)
8088iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8089{
8090 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8091 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8092
8093 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8094 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8095
8096 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8097
8098 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
8099 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
8100 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
8101
8102 /* Free but don't flush the source register. */
8103 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8104 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8105
8106 return off;
8107}
8108
8109
8110#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8111 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8112
8113/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
8114DECL_INLINE_THROW(uint32_t)
8115iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8116{
8117 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8118 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8119
8120 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8121 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8122
8123 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8124
8125 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8126 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
8127 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
8128 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
8129
8130 /* Free but don't flush the source register. */
8131 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8132 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8133
8134 return off;
8135}
8136
8137
8138#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8139 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8140
8141/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
8142DECL_INLINE_THROW(uint32_t)
8143iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8144{
8145 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8146 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8147
8148 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8149 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8150
8151 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8152
8153 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8154 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8155 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
8156 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
8157
8158 /* Free but don't flush the source register. */
8159 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8160 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8161
8162 return off;
8163}
8164
8165
8166#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
8167 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
8168
8169/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
8170DECL_INLINE_THROW(uint32_t)
8171iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
8172{
8173 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8174 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8175
8176 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8177 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8178 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8179 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8180 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8181
8182 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8183 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8184 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8185 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYRegDst);
8186 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYRegDst);
8187
8188 /* Free but don't flush the source and destination registers. */
8189 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8190 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8191 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8192
8193 return off;
8194}
8195
8196
8197#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
8198 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
8199
8200/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
8201DECL_INLINE_THROW(uint32_t)
8202iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
8203{
8204 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8205 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8206
8207 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8208 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8209 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8210 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8211 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8212
8213 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8214 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
8215 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8216 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYRegDst);
8217 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYRegDst);
8218
8219 /* Free but don't flush the source and destination registers. */
8220 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8221 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8222 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8223
8224 return off;
8225}
8226
8227
8228#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
8229 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
8230
8231
8232/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
8233DECL_INLINE_THROW(uint32_t)
8234iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
8235{
8236 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8237 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8238
8239 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
8240 if (bImm8Mask & RT_BIT(0))
8241 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
8242 if (bImm8Mask & RT_BIT(1))
8243 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
8244 if (bImm8Mask & RT_BIT(2))
8245 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
8246 if (bImm8Mask & RT_BIT(3))
8247 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
8248 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
8249
8250 /* Free but don't flush the destination register. */
8251 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8252
8253 return off;
8254}
8255
8256
8257#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
8258 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
8259
8260
8261/** Emits code for IEM_MC_FETCH_YREG_U256. */
8262DECL_INLINE_THROW(uint32_t)
8263iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
8264{
8265 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8266 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
8267
8268 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8269 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
8270 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8271
8272 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
8273
8274 /* Free but don't flush the source register. */
8275 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8276 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8277
8278 return off;
8279}
8280
8281
8282#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
8283 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
8284
8285
8286/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX. */
8287DECL_INLINE_THROW(uint32_t)
8288iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
8289{
8290 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8291 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8292
8293 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8294 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8295 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8296
8297 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
8298 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYRegDst);
8299 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYRegDst);
8300
8301 /* Free but don't flush the source register. */
8302 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8303 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8304
8305 return off;
8306}
8307
8308
8309#define IEM_MC_SSE_UPDATE_MXCSR(a_fMxcsr) \
8310 off = iemNativeEmitSimdSseUpdateMxcsr(pReNative, off, a_fMxcsr)
8311
8312/** Emits code for IEM_MC_SSE_UPDATE_MXCSR. */
8313DECL_INLINE_THROW(uint32_t)
8314iemNativeEmitSimdSseUpdateMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxMxCsrVar)
8315{
8316 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxMxCsrVar);
8317 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxMxCsrVar, sizeof(uint32_t));
8318
8319 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ForUpdate);
8320 uint8_t const idxVarRegMxCsr = iemNativeVarRegisterAcquire(pReNative, idxMxCsrVar, &off, true /*fInitalized*/);
8321 uint8_t const idxVarRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8322
8323 /** @todo r=aeichner I think it would be safe to spare the temporary register and trash
8324 * the variable MXCSR register as it isn't used afterwards in the microcode block anyway.
8325 * Needs verification though, so play it safe for now.
8326 */
8327 /* mov tmp, varmxcsr */
8328 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarRegTmp, idxVarRegMxCsr);
8329 /* and tmp, X86_MXCSR_XCPT_FLAGS */
8330 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarRegTmp, X86_MXCSR_XCPT_FLAGS);
8331 /* or mxcsr, tmp */
8332 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxVarRegTmp);
8333
8334 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
8335 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
8336
8337 /* Free but don't flush the MXCSR register. */
8338 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8339 iemNativeVarRegisterRelease(pReNative, idxMxCsrVar);
8340 iemNativeRegFreeTmp(pReNative, idxVarRegTmp);
8341
8342 return off;
8343}
8344
8345
8346#define IEM_MC_STORE_SSE_RESULT(a_SseData, a_iXmmReg) \
8347 off = iemNativeEmitSimdSseStoreResult(pReNative, off, a_SseData, a_iXmmReg)
8348
8349/** Emits code for IEM_MC_STORE_SSE_RESULT. */
8350DECL_INLINE_THROW(uint32_t)
8351iemNativeEmitSimdSseStoreResult(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSseDataVar, uint8_t iXReg)
8352{
8353 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSseDataVar);
8354 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSseDataVar, sizeof(IEMSSERESULT));
8355
8356 /** @todo r=aeichner We probably need to rework this MC statement and the users to make thing more efficient. */
8357 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8358 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
8359 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ForUpdate);
8360 uint8_t const idxVarRegResAddr = iemNativeRegAllocTmp(pReNative, &off);
8361 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8362
8363 /* Assume the register to be always dirty for now, even if it doesn't get written when the code is executed due to unmasked exceptions. */
8364 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
8365
8366 off = iemNativeEmitLoadArgGregWithVarAddr(pReNative, off, idxVarRegResAddr, idxSseDataVar, false /*fFlushShadows*/);
8367
8368 /* Update MXCSR. */
8369 off = iemNativeEmitLoadGprByGprU32(pReNative, off, idxRegTmp, idxVarRegResAddr, RT_UOFFSETOF_DYN(IEMSSERESULT, MXCSR));
8370 /* tmp &= X86_MXCSR_XCPT_FLAGS. */
8371 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
8372 /* mxcsr |= tmp */
8373 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
8374
8375 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
8376 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
8377
8378 /* Update the value if there is no unmasked exception. */
8379 /* tmp = mxcsr */
8380 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
8381 /* tmp &= X86_MXCSR_XCPT_MASK */
8382 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
8383 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
8384 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
8385 /* tmp = ~tmp */
8386 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
8387 /* tmp &= mxcsr */
8388 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
8389
8390 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
8391 uint32_t offFixup = off;
8392 off = iemNativeEmitJnzToFixed(pReNative, off, off);
8393 AssertCompileMemberSize(IEMSSERESULT, uResult, sizeof(RTFLOAT128U));
8394 off = iemNativeEmitLoadVecRegByGprU128(pReNative, off, idxSimdRegDst, idxVarRegResAddr, RT_UOFFSETOF_DYN(IEMSSERESULT, uResult));
8395 iemNativeFixupFixedJump(pReNative, offFixup, off);
8396
8397 /* Free but don't flush the shadowed register. */
8398 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8399 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8400 iemNativeRegFreeTmp(pReNative, idxVarRegResAddr);
8401 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8402
8403 return off;
8404}
8405
8406
8407/*********************************************************************************************************************************
8408* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
8409*********************************************************************************************************************************/
8410
8411/**
8412 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX.
8413 */
8414DECL_INLINE_THROW(uint32_t)
8415iemNativeEmitCallSseAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
8416{
8417 /*
8418 * Need to do the FPU preparation.
8419 */
8420 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
8421
8422 /*
8423 * Do all the call setup and cleanup.
8424 */
8425 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS);
8426
8427 /*
8428 * Load the XState::x87 pointer.
8429 */
8430 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, kIemNativeGstRegRef_X87, 0 /*idxRegInClass*/);
8431
8432 /*
8433 * Make the call.
8434 */
8435 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8436
8437 return off;
8438}
8439
8440
8441#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
8442 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8443
8444/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
8445DECL_INLINE_THROW(uint32_t)
8446iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8447{
8448 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8449 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8450 return iemNativeEmitCallSseAImplCommon(pReNative, off, pfnAImpl, 2);
8451}
8452
8453
8454#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8455 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8456
8457/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
8458DECL_INLINE_THROW(uint32_t)
8459iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8460{
8461 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8462 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8463 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8464 return iemNativeEmitCallSseAImplCommon(pReNative, off, pfnAImpl, 3);
8465}
8466
8467
8468/*********************************************************************************************************************************
8469* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
8470*********************************************************************************************************************************/
8471
8472/**
8473 * Common worker for IEM_MC_CALL_AVX_AIMPL_XXX.
8474 */
8475DECL_INLINE_THROW(uint32_t)
8476iemNativeEmitCallAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
8477{
8478 /*
8479 * Need to do the FPU preparation.
8480 */
8481 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
8482
8483 /*
8484 * Do all the call setup and cleanup.
8485 */
8486 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_AVX_AIMPL_HIDDEN_ARGS, IEM_AVX_AIMPL_HIDDEN_ARGS);
8487
8488 /*
8489 * Load the XState pointer.
8490 */
8491 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, kIemNativeGstRegRef_XState, 0 /*idxRegInClass*/);
8492
8493 /*
8494 * Make the call.
8495 */
8496 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8497
8498 return off;
8499}
8500
8501
8502#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
8503 off = iemNativeEmitCallAvxAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8504
8505/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
8506DECL_INLINE_THROW(uint32_t)
8507iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8508{
8509 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8510 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8511 return iemNativeEmitCallAvxAImplCommon(pReNative, off, pfnAImpl, 2);
8512}
8513
8514
8515#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8516 off = iemNativeEmitCallAvxAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8517
8518/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
8519DECL_INLINE_THROW(uint32_t)
8520iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8521{
8522 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8523 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8524 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8525 return iemNativeEmitCallAvxAImplCommon(pReNative, off, pfnAImpl, 3);
8526}
8527#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8528
8529
8530/*********************************************************************************************************************************
8531* Include instruction emitters. *
8532*********************************************************************************************************************************/
8533#include "target-x86/IEMAllN8veEmit-x86.h"
8534
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette