VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 103888

Last change on this file since 103888 was 103881, checked in by vboxsync, 9 months ago

VMM/IEM: Fix IEM_MC_CALL_SSE_AIMPL_2() and IEM_MC_CALL_SSE_AIMPL_3() (introduced with r162236), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 361.9 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 103881 2024-03-18 08:45:06Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
117 if ( enmClass == kIemNativeGstRegRef_XReg
118 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
119 {
120 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
121 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
122 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
123
124 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
125 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
126 }
127#endif
128 RT_NOREF(pReNative, enmClass, idxReg);
129 return off;
130}
131
132
133
134/*********************************************************************************************************************************
135* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
136*********************************************************************************************************************************/
137
138#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
139 { \
140 Assert(pReNative->Core.bmVars == 0); \
141 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
142 Assert(pReNative->Core.bmStack == 0); \
143 pReNative->fMc = (a_fMcFlags); \
144 pReNative->fCImpl = (a_fCImplFlags); \
145 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
146
147/** We have to get to the end in recompilation mode, as otherwise we won't
148 * generate code for all the IEM_MC_IF_XXX branches. */
149#define IEM_MC_END() \
150 iemNativeVarFreeAll(pReNative); \
151 } return off
152
153
154
155/*********************************************************************************************************************************
156* Native Emitter Support. *
157*********************************************************************************************************************************/
158
159#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
160
161#define IEM_MC_NATIVE_ELSE() } else {
162
163#define IEM_MC_NATIVE_ENDIF() } ((void)0)
164
165
166#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
167 off = a_fnEmitter(pReNative, off)
168
169#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
170 off = a_fnEmitter(pReNative, off, (a0))
171
172#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
173 off = a_fnEmitter(pReNative, off, (a0), (a1))
174
175#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
176 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
177
178#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
179 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
180
181#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
182 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
183
184#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
185 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
186
187#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
189
190#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
192
193
194
195/*********************************************************************************************************************************
196* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
197*********************************************************************************************************************************/
198
199#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
200 pReNative->fMc = 0; \
201 pReNative->fCImpl = (a_fFlags); \
202 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
203
204
205#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
206 pReNative->fMc = 0; \
207 pReNative->fCImpl = (a_fFlags); \
208 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
209
210DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
211 uint8_t idxInstr, uint64_t a_fGstShwFlush,
212 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
213{
214 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
215}
216
217
218#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
219 pReNative->fMc = 0; \
220 pReNative->fCImpl = (a_fFlags); \
221 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
222 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
223
224DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
225 uint8_t idxInstr, uint64_t a_fGstShwFlush,
226 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
227{
228 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
229}
230
231
232#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
233 pReNative->fMc = 0; \
234 pReNative->fCImpl = (a_fFlags); \
235 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
236 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
237
238DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
239 uint8_t idxInstr, uint64_t a_fGstShwFlush,
240 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
241 uint64_t uArg2)
242{
243 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
244}
245
246
247
248/*********************************************************************************************************************************
249* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
250*********************************************************************************************************************************/
251
252/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
253 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
254DECL_INLINE_THROW(uint32_t)
255iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
256{
257 /*
258 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
259 * return with special status code and make the execution loop deal with
260 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
261 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
262 * could continue w/o interruption, it probably will drop into the
263 * debugger, so not worth the effort of trying to services it here and we
264 * just lump it in with the handling of the others.
265 *
266 * To simplify the code and the register state management even more (wrt
267 * immediate in AND operation), we always update the flags and skip the
268 * extra check associated conditional jump.
269 */
270 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
271 <= UINT32_MAX);
272#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
273 AssertMsg( pReNative->idxCurCall == 0
274 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
275 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
276#endif
277
278 /*
279 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
280 * any pending register writes must be flushed.
281 */
282 off = iemNativeRegFlushPendingWrites(pReNative, off);
283
284 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
285 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
286 true /*fSkipLivenessAssert*/);
287 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
288 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
289 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
290 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
291 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
292
293 /* Free but don't flush the EFLAGS register. */
294 iemNativeRegFreeTmp(pReNative, idxEflReg);
295
296 return off;
297}
298
299
300/** The VINF_SUCCESS dummy. */
301template<int const a_rcNormal>
302DECL_FORCE_INLINE(uint32_t)
303iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
304{
305 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
306 if (a_rcNormal != VINF_SUCCESS)
307 {
308#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
309 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
310#else
311 RT_NOREF_PV(idxInstr);
312#endif
313
314 /* As this code returns from the TB any pending register writes must be flushed. */
315 off = iemNativeRegFlushPendingWrites(pReNative, off);
316
317 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
318 }
319 return off;
320}
321
322
323#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
324 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
325 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
326
327#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
328 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
329 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
330 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
331
332/** Same as iemRegAddToRip64AndFinishingNoFlags. */
333DECL_INLINE_THROW(uint32_t)
334iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
335{
336#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
337# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
338 if (!pReNative->Core.offPc)
339 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
340# endif
341
342 /* Allocate a temporary PC register. */
343 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
344
345 /* Perform the addition and store the result. */
346 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
347 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
348
349 /* Free but don't flush the PC register. */
350 iemNativeRegFreeTmp(pReNative, idxPcReg);
351#endif
352
353#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
354 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
355
356 pReNative->Core.offPc += cbInstr;
357# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
358 off = iemNativePcAdjustCheck(pReNative, off);
359# endif
360 if (pReNative->cCondDepth)
361 off = iemNativeEmitPcWriteback(pReNative, off);
362 else
363 pReNative->Core.cInstrPcUpdateSkipped++;
364#endif
365
366 return off;
367}
368
369
370#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
371 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
372 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
373
374#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
375 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
376 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
377 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
378
379/** Same as iemRegAddToEip32AndFinishingNoFlags. */
380DECL_INLINE_THROW(uint32_t)
381iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
382{
383#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
384# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
385 if (!pReNative->Core.offPc)
386 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
387# endif
388
389 /* Allocate a temporary PC register. */
390 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
391
392 /* Perform the addition and store the result. */
393 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
394 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
395
396 /* Free but don't flush the PC register. */
397 iemNativeRegFreeTmp(pReNative, idxPcReg);
398#endif
399
400#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
401 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
402
403 pReNative->Core.offPc += cbInstr;
404# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
405 off = iemNativePcAdjustCheck(pReNative, off);
406# endif
407 if (pReNative->cCondDepth)
408 off = iemNativeEmitPcWriteback(pReNative, off);
409 else
410 pReNative->Core.cInstrPcUpdateSkipped++;
411#endif
412
413 return off;
414}
415
416
417#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
418 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
419 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
420
421#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
422 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
423 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
424 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
425
426/** Same as iemRegAddToIp16AndFinishingNoFlags. */
427DECL_INLINE_THROW(uint32_t)
428iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
429{
430#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
431# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
432 if (!pReNative->Core.offPc)
433 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
434# endif
435
436 /* Allocate a temporary PC register. */
437 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
438
439 /* Perform the addition and store the result. */
440 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
441 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
442 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
443
444 /* Free but don't flush the PC register. */
445 iemNativeRegFreeTmp(pReNative, idxPcReg);
446#endif
447
448#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
449 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
450
451 pReNative->Core.offPc += cbInstr;
452# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
453 off = iemNativePcAdjustCheck(pReNative, off);
454# endif
455 if (pReNative->cCondDepth)
456 off = iemNativeEmitPcWriteback(pReNative, off);
457 else
458 pReNative->Core.cInstrPcUpdateSkipped++;
459#endif
460
461 return off;
462}
463
464
465
466/*********************************************************************************************************************************
467* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
468*********************************************************************************************************************************/
469
470#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
471 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
472 (a_enmEffOpSize), pCallEntry->idxInstr); \
473 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
474
475#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
476 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
477 (a_enmEffOpSize), pCallEntry->idxInstr); \
478 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
479 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
480
481#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
482 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
483 IEMMODE_16BIT, pCallEntry->idxInstr); \
484 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
485
486#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
487 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
488 IEMMODE_16BIT, pCallEntry->idxInstr); \
489 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
490 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
491
492#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
493 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
494 IEMMODE_64BIT, pCallEntry->idxInstr); \
495 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
496
497#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
498 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
499 IEMMODE_64BIT, pCallEntry->idxInstr); \
500 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
501 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
502
503/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
504 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
505 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
506DECL_INLINE_THROW(uint32_t)
507iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
508 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
509{
510 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
511
512 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
513 off = iemNativeRegFlushPendingWrites(pReNative, off);
514
515#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
516 Assert(pReNative->Core.offPc == 0);
517
518 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
519#endif
520
521 /* Allocate a temporary PC register. */
522 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
523
524 /* Perform the addition. */
525 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
526
527 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
528 {
529 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
530 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
531 }
532 else
533 {
534 /* Just truncate the result to 16-bit IP. */
535 Assert(enmEffOpSize == IEMMODE_16BIT);
536 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
537 }
538 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
539
540 /* Free but don't flush the PC register. */
541 iemNativeRegFreeTmp(pReNative, idxPcReg);
542
543 return off;
544}
545
546
547#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
548 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
549 (a_enmEffOpSize), pCallEntry->idxInstr); \
550 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
551
552#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
553 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
554 (a_enmEffOpSize), pCallEntry->idxInstr); \
555 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
556 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
557
558#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
559 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
560 IEMMODE_16BIT, pCallEntry->idxInstr); \
561 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
562
563#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
564 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
565 IEMMODE_16BIT, pCallEntry->idxInstr); \
566 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
567 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
568
569#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
570 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
571 IEMMODE_32BIT, pCallEntry->idxInstr); \
572 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
573
574#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
575 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
576 IEMMODE_32BIT, pCallEntry->idxInstr); \
577 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
578 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
579
580/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
581 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
582 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
583DECL_INLINE_THROW(uint32_t)
584iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
585 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
586{
587 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
588
589 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
590 off = iemNativeRegFlushPendingWrites(pReNative, off);
591
592#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
593 Assert(pReNative->Core.offPc == 0);
594
595 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
596#endif
597
598 /* Allocate a temporary PC register. */
599 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
600
601 /* Perform the addition. */
602 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
603
604 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
605 if (enmEffOpSize == IEMMODE_16BIT)
606 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
607
608 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
609/** @todo we can skip this in 32-bit FLAT mode. */
610 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
611
612 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
613
614 /* Free but don't flush the PC register. */
615 iemNativeRegFreeTmp(pReNative, idxPcReg);
616
617 return off;
618}
619
620
621#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
622 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
623 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
624
625#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
626 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
627 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
628 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
629
630#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
631 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
632 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
633
634#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
635 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
636 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
637 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
638
639#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
640 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
641 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
642
643#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
644 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
645 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
646 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
647
648/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
649DECL_INLINE_THROW(uint32_t)
650iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
651 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
652{
653 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
654 off = iemNativeRegFlushPendingWrites(pReNative, off);
655
656#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
657 Assert(pReNative->Core.offPc == 0);
658
659 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
660#endif
661
662 /* Allocate a temporary PC register. */
663 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
664
665 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
666 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
667 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
668 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
669 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
670
671 /* Free but don't flush the PC register. */
672 iemNativeRegFreeTmp(pReNative, idxPcReg);
673
674 return off;
675}
676
677
678
679/*********************************************************************************************************************************
680* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
681*********************************************************************************************************************************/
682
683/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
684#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
685 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
686
687/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
688#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
689 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
690
691/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
692#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
693 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
694
695/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
696 * clears flags. */
697#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
698 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
699 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
700
701/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
702 * clears flags. */
703#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
704 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
705 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
706
707/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
708 * clears flags. */
709#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
710 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
711 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
712
713#undef IEM_MC_SET_RIP_U16_AND_FINISH
714
715
716/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
717#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
718 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
719
720/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
721#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
722 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
723
724/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
725 * clears flags. */
726#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
727 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
728 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
729
730/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
731 * and clears flags. */
732#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
733 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
734 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
735
736#undef IEM_MC_SET_RIP_U32_AND_FINISH
737
738
739/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
740#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
741 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
742
743/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
744 * and clears flags. */
745#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
746 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
747 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
748
749#undef IEM_MC_SET_RIP_U64_AND_FINISH
750
751
752/** Same as iemRegRipJumpU16AndFinishNoFlags,
753 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
754DECL_INLINE_THROW(uint32_t)
755iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
756 uint8_t idxInstr, uint8_t cbVar)
757{
758 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
759 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
760
761 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
762 off = iemNativeRegFlushPendingWrites(pReNative, off);
763
764#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
765 Assert(pReNative->Core.offPc == 0);
766
767 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
768#endif
769
770 /* Get a register with the new PC loaded from idxVarPc.
771 Note! This ASSUMES that the high bits of the GPR is zeroed. */
772 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
773
774 /* Check limit (may #GP(0) + exit TB). */
775 if (!f64Bit)
776/** @todo we can skip this test in FLAT 32-bit mode. */
777 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
778 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
779 else if (cbVar > sizeof(uint32_t))
780 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
781
782 /* Store the result. */
783 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
784
785 iemNativeVarRegisterRelease(pReNative, idxVarPc);
786 /** @todo implictly free the variable? */
787
788 return off;
789}
790
791
792
793/*********************************************************************************************************************************
794* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
795*********************************************************************************************************************************/
796
797#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
798 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
799
800/**
801 * Emits code to check if a \#NM exception should be raised.
802 *
803 * @returns New code buffer offset, UINT32_MAX on failure.
804 * @param pReNative The native recompile state.
805 * @param off The code buffer offset.
806 * @param idxInstr The current instruction.
807 */
808DECL_INLINE_THROW(uint32_t)
809iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
810{
811#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
812 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
813
814 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
815 {
816#endif
817 /*
818 * Make sure we don't have any outstanding guest register writes as we may
819 * raise an #NM and all guest register must be up to date in CPUMCTX.
820 */
821 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
822 off = iemNativeRegFlushPendingWrites(pReNative, off);
823
824#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
825 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
826#else
827 RT_NOREF(idxInstr);
828#endif
829
830 /* Allocate a temporary CR0 register. */
831 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
832 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
833
834 /*
835 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
836 * return raisexcpt();
837 */
838 /* Test and jump. */
839 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
840
841 /* Free but don't flush the CR0 register. */
842 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
843
844#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
845 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
846 }
847 else
848 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
849#endif
850
851 return off;
852}
853
854
855#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
856 off = iemNativeEmitMaybeFpuException(pReNative, off, pCallEntry->idxInstr)
857
858/**
859 * Emits code to check if a \#MF exception should be raised.
860 *
861 * @returns New code buffer offset, UINT32_MAX on failure.
862 * @param pReNative The native recompile state.
863 * @param off The code buffer offset.
864 * @param idxInstr The current instruction.
865 */
866DECL_INLINE_THROW(uint32_t)
867iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
868{
869 /*
870 * Make sure we don't have any outstanding guest register writes as we may
871 * raise an #MF and all guest register must be up to date in CPUMCTX.
872 */
873 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
874 off = iemNativeRegFlushPendingWrites(pReNative, off);
875
876#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
877 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
878#else
879 RT_NOREF(idxInstr);
880#endif
881
882 /* Allocate a temporary FSW register. */
883 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
884 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
885
886 /*
887 * if (FSW & X86_FSW_ES != 0)
888 * return raisexcpt();
889 */
890 /* Test and jump. */
891 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
892
893 /* Free but don't flush the FSW register. */
894 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
895
896 return off;
897}
898
899
900#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
901 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
902
903/**
904 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
905 *
906 * @returns New code buffer offset, UINT32_MAX on failure.
907 * @param pReNative The native recompile state.
908 * @param off The code buffer offset.
909 * @param idxInstr The current instruction.
910 */
911DECL_INLINE_THROW(uint32_t)
912iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
913{
914#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
915 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
916
917 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
918 {
919#endif
920 /*
921 * Make sure we don't have any outstanding guest register writes as we may
922 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
923 */
924 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
925 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
926
927#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
928 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
929#else
930 RT_NOREF(idxInstr);
931#endif
932
933 /* Allocate a temporary CR0 and CR4 register. */
934 uint8_t const idxLabelRaiseSseRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseRelated);
935 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
936 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
937 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
938
939 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
940#ifdef RT_ARCH_AMD64
941 /*
942 * We do a modified test here:
943 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
944 * else { goto RaiseSseRelated; }
945 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
946 * all targets except the 386, which doesn't support SSE, this should
947 * be a safe assumption.
948 */
949 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
950 //pCodeBuf[off++] = 0xcc;
951 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
952 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
953 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
954 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
955 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
956 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseSseRelated, kIemNativeInstrCond_ne);
957
958#elif defined(RT_ARCH_ARM64)
959 /*
960 * We do a modified test here:
961 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
962 * else { goto RaiseSseRelated; }
963 */
964 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
965 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
966 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
967 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
968 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
969 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
970 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
971 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
972 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
973 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
974 idxLabelRaiseSseRelated);
975
976#else
977# error "Port me!"
978#endif
979
980 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
981 iemNativeRegFreeTmp(pReNative, idxTmpReg);
982 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
983 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
984
985#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
986 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
987 }
988 else
989 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
990#endif
991
992 return off;
993}
994
995
996#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
997 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
998
999/**
1000 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
1001 *
1002 * @returns New code buffer offset, UINT32_MAX on failure.
1003 * @param pReNative The native recompile state.
1004 * @param off The code buffer offset.
1005 * @param idxInstr The current instruction.
1006 */
1007DECL_INLINE_THROW(uint32_t)
1008iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1009{
1010#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1011 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
1012
1013 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
1014 {
1015#endif
1016 /*
1017 * Make sure we don't have any outstanding guest register writes as we may
1018 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1019 */
1020 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
1021 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
1022
1023#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1024 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1025#else
1026 RT_NOREF(idxInstr);
1027#endif
1028
1029 /* Allocate a temporary CR0, CR4 and XCR0 register. */
1030 uint8_t const idxLabelRaiseAvxRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseAvxRelated);
1031 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
1032 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
1033 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
1034 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1035
1036 /*
1037 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
1038 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
1039 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
1040 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
1041 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
1042 * { likely }
1043 * else { goto RaiseAvxRelated; }
1044 */
1045#ifdef RT_ARCH_AMD64
1046 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
1047 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
1048 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
1049 ^ 0x1a) ) { likely }
1050 else { goto RaiseAvxRelated; } */
1051 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
1052 //pCodeBuf[off++] = 0xcc;
1053 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
1054 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
1055 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
1056 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1057 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
1058 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
1059 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1060 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
1061 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
1062 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
1063 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseAvxRelated, kIemNativeInstrCond_ne);
1064
1065#elif defined(RT_ARCH_ARM64)
1066 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
1067 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
1068 else { goto RaiseAvxRelated; } */
1069 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
1070 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1071 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
1072 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
1073 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
1074 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
1075 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
1076 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
1077 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
1078 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
1079 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
1080 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
1081 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1082 idxLabelRaiseAvxRelated);
1083
1084#else
1085# error "Port me!"
1086#endif
1087
1088 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1089 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1090 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1091 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
1092#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1093 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
1094 }
1095 else
1096 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
1097#endif
1098
1099 return off;
1100}
1101
1102
1103#define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1104 off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
1105
1106/**
1107 * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
1108 *
1109 * @returns New code buffer offset, UINT32_MAX on failure.
1110 * @param pReNative The native recompile state.
1111 * @param off The code buffer offset.
1112 * @param idxInstr The current instruction.
1113 */
1114DECL_INLINE_THROW(uint32_t)
1115iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1116{
1117 /*
1118 * Make sure we don't have any outstanding guest register writes as we may
1119 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1120 */
1121 off = iemNativeRegFlushPendingWrites(pReNative, off);
1122
1123#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1124 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1125#else
1126 RT_NOREF(idxInstr);
1127#endif
1128
1129 /* Allocate a temporary CR4 register. */
1130 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
1131 uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
1132 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
1133
1134 /*
1135 * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
1136 * return raisexcpt();
1137 */
1138 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
1139
1140 /* raise \#UD exception unconditionally. */
1141 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
1142
1143 /* Free but don't flush the CR4 register. */
1144 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1145
1146 return off;
1147}
1148
1149
1150#define IEM_MC_RAISE_DIVIDE_ERROR() \
1151 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
1152
1153/**
1154 * Emits code to raise a \#DE.
1155 *
1156 * @returns New code buffer offset, UINT32_MAX on failure.
1157 * @param pReNative The native recompile state.
1158 * @param off The code buffer offset.
1159 * @param idxInstr The current instruction.
1160 */
1161DECL_INLINE_THROW(uint32_t)
1162iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1163{
1164 /*
1165 * Make sure we don't have any outstanding guest register writes as we may
1166 */
1167 off = iemNativeRegFlushPendingWrites(pReNative, off);
1168
1169#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1170 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1171#else
1172 RT_NOREF(idxInstr);
1173#endif
1174
1175 uint8_t const idxLabelRaiseDe = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseDe);
1176
1177 /* raise \#DE exception unconditionally. */
1178 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseDe);
1179
1180 return off;
1181}
1182
1183
1184/*********************************************************************************************************************************
1185* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
1186*********************************************************************************************************************************/
1187
1188/**
1189 * Pushes an IEM_MC_IF_XXX onto the condition stack.
1190 *
1191 * @returns Pointer to the condition stack entry on success, NULL on failure
1192 * (too many nestings)
1193 */
1194DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
1195{
1196#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1197 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
1198#endif
1199
1200 uint32_t const idxStack = pReNative->cCondDepth;
1201 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
1202
1203 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
1204 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
1205
1206 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
1207 pEntry->fInElse = false;
1208 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
1209 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
1210
1211 return pEntry;
1212}
1213
1214
1215/**
1216 * Start of the if-block, snapshotting the register and variable state.
1217 */
1218DECL_INLINE_THROW(void)
1219iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
1220{
1221 Assert(offIfBlock != UINT32_MAX);
1222 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1223 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1224 Assert(!pEntry->fInElse);
1225
1226 /* Define the start of the IF block if request or for disassembly purposes. */
1227 if (idxLabelIf != UINT32_MAX)
1228 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
1229#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1230 else
1231 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
1232#else
1233 RT_NOREF(offIfBlock);
1234#endif
1235
1236#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1237 Assert(pReNative->Core.offPc == 0);
1238#endif
1239
1240 /* Copy the initial state so we can restore it in the 'else' block. */
1241 pEntry->InitialState = pReNative->Core;
1242}
1243
1244
1245#define IEM_MC_ELSE() } while (0); \
1246 off = iemNativeEmitElse(pReNative, off); \
1247 do {
1248
1249/** Emits code related to IEM_MC_ELSE. */
1250DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1251{
1252 /* Check sanity and get the conditional stack entry. */
1253 Assert(off != UINT32_MAX);
1254 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1255 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1256 Assert(!pEntry->fInElse);
1257
1258 /* Jump to the endif */
1259 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
1260
1261 /* Define the else label and enter the else part of the condition. */
1262 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1263 pEntry->fInElse = true;
1264
1265#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1266 Assert(pReNative->Core.offPc == 0);
1267#endif
1268
1269 /* Snapshot the core state so we can do a merge at the endif and restore
1270 the snapshot we took at the start of the if-block. */
1271 pEntry->IfFinalState = pReNative->Core;
1272 pReNative->Core = pEntry->InitialState;
1273
1274 return off;
1275}
1276
1277
1278#define IEM_MC_ENDIF() } while (0); \
1279 off = iemNativeEmitEndIf(pReNative, off)
1280
1281/** Emits code related to IEM_MC_ENDIF. */
1282DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1283{
1284 /* Check sanity and get the conditional stack entry. */
1285 Assert(off != UINT32_MAX);
1286 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1287 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1288
1289#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1290 Assert(pReNative->Core.offPc == 0);
1291#endif
1292
1293 /*
1294 * Now we have find common group with the core state at the end of the
1295 * if-final. Use the smallest common denominator and just drop anything
1296 * that isn't the same in both states.
1297 */
1298 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
1299 * which is why we're doing this at the end of the else-block.
1300 * But we'd need more info about future for that to be worth the effort. */
1301 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
1302 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
1303 {
1304 /* shadow guest stuff first. */
1305 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
1306 if (fGstRegs)
1307 {
1308 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
1309 do
1310 {
1311 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
1312 fGstRegs &= ~RT_BIT_64(idxGstReg);
1313
1314 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
1315 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
1316 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
1317 {
1318 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
1319 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
1320 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
1321 }
1322 } while (fGstRegs);
1323 }
1324 else
1325 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
1326
1327 /* Check variables next. For now we must require them to be identical
1328 or stuff we can recreate. */
1329 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
1330 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
1331 if (fVars)
1332 {
1333 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
1334 do
1335 {
1336 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
1337 fVars &= ~RT_BIT_32(idxVar);
1338
1339 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
1340 {
1341 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
1342 continue;
1343 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
1344 {
1345 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1346 if (idxHstReg != UINT8_MAX)
1347 {
1348 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1349 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1350 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
1351 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1352 }
1353 continue;
1354 }
1355 }
1356 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
1357 continue;
1358
1359 /* Irreconcilable, so drop it. */
1360 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1361 if (idxHstReg != UINT8_MAX)
1362 {
1363 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1364 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1365 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
1366 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1367 }
1368 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1369 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
1370 } while (fVars);
1371 }
1372
1373 /* Finally, check that the host register allocations matches. */
1374 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
1375 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
1376 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
1377 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
1378 }
1379
1380 /*
1381 * Define the endif label and maybe the else one if we're still in the 'if' part.
1382 */
1383 if (!pEntry->fInElse)
1384 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1385 else
1386 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
1387 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
1388
1389 /* Pop the conditional stack.*/
1390 pReNative->cCondDepth -= 1;
1391
1392 return off;
1393}
1394
1395
1396#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
1397 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
1398 do {
1399
1400/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
1401DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1402{
1403 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1404 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1405
1406 /* Get the eflags. */
1407 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1408 kIemNativeGstRegUse_ReadOnly);
1409
1410 /* Test and jump. */
1411 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1412
1413 /* Free but don't flush the EFlags register. */
1414 iemNativeRegFreeTmp(pReNative, idxEflReg);
1415
1416 /* Make a copy of the core state now as we start the if-block. */
1417 iemNativeCondStartIfBlock(pReNative, off);
1418
1419 return off;
1420}
1421
1422
1423#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
1424 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
1425 do {
1426
1427/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
1428DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1429{
1430 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1431 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1432
1433 /* Get the eflags. */
1434 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1435 kIemNativeGstRegUse_ReadOnly);
1436
1437 /* Test and jump. */
1438 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1439
1440 /* Free but don't flush the EFlags register. */
1441 iemNativeRegFreeTmp(pReNative, idxEflReg);
1442
1443 /* Make a copy of the core state now as we start the if-block. */
1444 iemNativeCondStartIfBlock(pReNative, off);
1445
1446 return off;
1447}
1448
1449
1450#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
1451 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
1452 do {
1453
1454/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
1455DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1456{
1457 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1458 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1459
1460 /* Get the eflags. */
1461 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1462 kIemNativeGstRegUse_ReadOnly);
1463
1464 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1465 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1466
1467 /* Test and jump. */
1468 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1469
1470 /* Free but don't flush the EFlags register. */
1471 iemNativeRegFreeTmp(pReNative, idxEflReg);
1472
1473 /* Make a copy of the core state now as we start the if-block. */
1474 iemNativeCondStartIfBlock(pReNative, off);
1475
1476 return off;
1477}
1478
1479
1480#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
1481 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
1482 do {
1483
1484/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
1485DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1486{
1487 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1488 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1489
1490 /* Get the eflags. */
1491 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1492 kIemNativeGstRegUse_ReadOnly);
1493
1494 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1495 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1496
1497 /* Test and jump. */
1498 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1499
1500 /* Free but don't flush the EFlags register. */
1501 iemNativeRegFreeTmp(pReNative, idxEflReg);
1502
1503 /* Make a copy of the core state now as we start the if-block. */
1504 iemNativeCondStartIfBlock(pReNative, off);
1505
1506 return off;
1507}
1508
1509
1510#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
1511 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
1512 do {
1513
1514#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
1515 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
1516 do {
1517
1518/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
1519DECL_INLINE_THROW(uint32_t)
1520iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1521 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1522{
1523 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
1524 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1525
1526 /* Get the eflags. */
1527 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1528 kIemNativeGstRegUse_ReadOnly);
1529
1530 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1531 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1532
1533 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1534 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1535 Assert(iBitNo1 != iBitNo2);
1536
1537#ifdef RT_ARCH_AMD64
1538 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
1539
1540 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1541 if (iBitNo1 > iBitNo2)
1542 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1543 else
1544 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1545 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1546
1547#elif defined(RT_ARCH_ARM64)
1548 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1549 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1550
1551 /* and tmpreg, eflreg, #1<<iBitNo1 */
1552 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1553
1554 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1555 if (iBitNo1 > iBitNo2)
1556 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1557 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1558 else
1559 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1560 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1561
1562 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1563
1564#else
1565# error "Port me"
1566#endif
1567
1568 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1569 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1570 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1571
1572 /* Free but don't flush the EFlags and tmp registers. */
1573 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1574 iemNativeRegFreeTmp(pReNative, idxEflReg);
1575
1576 /* Make a copy of the core state now as we start the if-block. */
1577 iemNativeCondStartIfBlock(pReNative, off);
1578
1579 return off;
1580}
1581
1582
1583#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
1584 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
1585 do {
1586
1587#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
1588 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
1589 do {
1590
1591/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
1592 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
1593DECL_INLINE_THROW(uint32_t)
1594iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
1595 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1596{
1597 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
1598 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1599
1600 /* We need an if-block label for the non-inverted variant. */
1601 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
1602 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
1603
1604 /* Get the eflags. */
1605 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1606 kIemNativeGstRegUse_ReadOnly);
1607
1608 /* Translate the flag masks to bit numbers. */
1609 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1610 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1611
1612 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1613 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1614 Assert(iBitNo1 != iBitNo);
1615
1616 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1617 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1618 Assert(iBitNo2 != iBitNo);
1619 Assert(iBitNo2 != iBitNo1);
1620
1621#ifdef RT_ARCH_AMD64
1622 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
1623#elif defined(RT_ARCH_ARM64)
1624 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1625#endif
1626
1627 /* Check for the lone bit first. */
1628 if (!fInverted)
1629 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1630 else
1631 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
1632
1633 /* Then extract and compare the other two bits. */
1634#ifdef RT_ARCH_AMD64
1635 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1636 if (iBitNo1 > iBitNo2)
1637 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1638 else
1639 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1640 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1641
1642#elif defined(RT_ARCH_ARM64)
1643 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1644
1645 /* and tmpreg, eflreg, #1<<iBitNo1 */
1646 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1647
1648 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1649 if (iBitNo1 > iBitNo2)
1650 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1651 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1652 else
1653 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1654 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1655
1656 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1657
1658#else
1659# error "Port me"
1660#endif
1661
1662 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1663 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1664 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1665
1666 /* Free but don't flush the EFlags and tmp registers. */
1667 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1668 iemNativeRegFreeTmp(pReNative, idxEflReg);
1669
1670 /* Make a copy of the core state now as we start the if-block. */
1671 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
1672
1673 return off;
1674}
1675
1676
1677#define IEM_MC_IF_CX_IS_NZ() \
1678 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
1679 do {
1680
1681/** Emits code for IEM_MC_IF_CX_IS_NZ. */
1682DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1683{
1684 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1685
1686 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1687 kIemNativeGstRegUse_ReadOnly);
1688 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
1689 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1690
1691 iemNativeCondStartIfBlock(pReNative, off);
1692 return off;
1693}
1694
1695
1696#define IEM_MC_IF_ECX_IS_NZ() \
1697 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
1698 do {
1699
1700#define IEM_MC_IF_RCX_IS_NZ() \
1701 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
1702 do {
1703
1704/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
1705DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1706{
1707 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1708
1709 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1710 kIemNativeGstRegUse_ReadOnly);
1711 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
1712 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1713
1714 iemNativeCondStartIfBlock(pReNative, off);
1715 return off;
1716}
1717
1718
1719#define IEM_MC_IF_CX_IS_NOT_ONE() \
1720 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
1721 do {
1722
1723/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
1724DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1725{
1726 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1727
1728 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1729 kIemNativeGstRegUse_ReadOnly);
1730#ifdef RT_ARCH_AMD64
1731 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1732#else
1733 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1734 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1735 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1736#endif
1737 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1738
1739 iemNativeCondStartIfBlock(pReNative, off);
1740 return off;
1741}
1742
1743
1744#define IEM_MC_IF_ECX_IS_NOT_ONE() \
1745 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
1746 do {
1747
1748#define IEM_MC_IF_RCX_IS_NOT_ONE() \
1749 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
1750 do {
1751
1752/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
1753DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1754{
1755 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1756
1757 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1758 kIemNativeGstRegUse_ReadOnly);
1759 if (f64Bit)
1760 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1761 else
1762 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1763 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1764
1765 iemNativeCondStartIfBlock(pReNative, off);
1766 return off;
1767}
1768
1769
1770#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1771 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
1772 do {
1773
1774#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1775 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
1776 do {
1777
1778/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
1779 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1780DECL_INLINE_THROW(uint32_t)
1781iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
1782{
1783 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1784 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1785
1786 /* We have to load both RCX and EFLAGS before we can start branching,
1787 otherwise we'll end up in the else-block with an inconsistent
1788 register allocator state.
1789 Doing EFLAGS first as it's more likely to be loaded, right? */
1790 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1791 kIemNativeGstRegUse_ReadOnly);
1792 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1793 kIemNativeGstRegUse_ReadOnly);
1794
1795 /** @todo we could reduce this to a single branch instruction by spending a
1796 * temporary register and some setnz stuff. Not sure if loops are
1797 * worth it. */
1798 /* Check CX. */
1799#ifdef RT_ARCH_AMD64
1800 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1801#else
1802 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1803 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1804 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1805#endif
1806
1807 /* Check the EFlags bit. */
1808 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1809 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1810 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1811 !fCheckIfSet /*fJmpIfSet*/);
1812
1813 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1814 iemNativeRegFreeTmp(pReNative, idxEflReg);
1815
1816 iemNativeCondStartIfBlock(pReNative, off);
1817 return off;
1818}
1819
1820
1821#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1822 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
1823 do {
1824
1825#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1826 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
1827 do {
1828
1829#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1830 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
1831 do {
1832
1833#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1834 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
1835 do {
1836
1837/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
1838 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
1839 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
1840 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1841DECL_INLINE_THROW(uint32_t)
1842iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1843 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
1844{
1845 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1846 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1847
1848 /* We have to load both RCX and EFLAGS before we can start branching,
1849 otherwise we'll end up in the else-block with an inconsistent
1850 register allocator state.
1851 Doing EFLAGS first as it's more likely to be loaded, right? */
1852 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1853 kIemNativeGstRegUse_ReadOnly);
1854 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1855 kIemNativeGstRegUse_ReadOnly);
1856
1857 /** @todo we could reduce this to a single branch instruction by spending a
1858 * temporary register and some setnz stuff. Not sure if loops are
1859 * worth it. */
1860 /* Check RCX/ECX. */
1861 if (f64Bit)
1862 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1863 else
1864 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1865
1866 /* Check the EFlags bit. */
1867 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1868 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1869 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1870 !fCheckIfSet /*fJmpIfSet*/);
1871
1872 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1873 iemNativeRegFreeTmp(pReNative, idxEflReg);
1874
1875 iemNativeCondStartIfBlock(pReNative, off);
1876 return off;
1877}
1878
1879
1880#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
1881 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
1882 do {
1883
1884/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
1885DECL_INLINE_THROW(uint32_t)
1886iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
1887{
1888 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1889
1890 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
1891 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
1892 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
1893 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
1894
1895 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
1896
1897 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
1898
1899 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
1900
1901 iemNativeCondStartIfBlock(pReNative, off);
1902 return off;
1903}
1904
1905
1906#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1907
1908#define IEM_MC_IF_MXCSR_XCPT_PENDING() \
1909 off = iemNativeEmitIfMxcsrXcptPending(pReNative, off); \
1910 do {
1911
1912/** Emits code for IEM_MC_IF_MXCSR_XCPT_PENDING. */
1913DECL_INLINE_THROW(uint32_t)
1914iemNativeEmitIfMxcsrXcptPending(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1915{
1916 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1917
1918 uint8_t const idxGstMxcsrReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
1919 kIemNativeGstRegUse_Calculation);
1920 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1921
1922 /* mov tmp0, mxcsr */
1923 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp, idxGstMxcsrReg);
1924 /* tmp0 &= X86_MXCSR_XCPT_FLAGS */
1925 off = iemNativeEmitAndGprByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
1926 /* mxcsr &= X86_MXCSR_XCPT_MASK */
1927 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstMxcsrReg, X86_MXCSR_XCPT_MASK);
1928 /* mxcsr ~= mxcsr */
1929 off = iemNativeEmitInvBitsGpr(pReNative, off, idxGstMxcsrReg, idxGstMxcsrReg);
1930 /* mxcsr >>= X86_MXCSR_XCPT_MASK_SHIFT */
1931 off = iemNativeEmitShiftGprRight(pReNative, off, idxGstMxcsrReg, X86_MXCSR_XCPT_MASK_SHIFT);
1932 /* tmp0 &= mxcsr */
1933 off = iemNativeEmitAndGprByGpr(pReNative, off, idxRegTmp, idxGstMxcsrReg);
1934
1935 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxRegTmp, true /*f64Bit*/, pEntry->idxLabelElse);
1936 iemNativeRegFreeTmp(pReNative, idxGstMxcsrReg);
1937 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1938
1939 iemNativeCondStartIfBlock(pReNative, off);
1940 return off;
1941}
1942
1943#endif
1944
1945
1946/*********************************************************************************************************************************
1947* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
1948*********************************************************************************************************************************/
1949
1950#define IEM_MC_NOREF(a_Name) \
1951 RT_NOREF_PV(a_Name)
1952
1953#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
1954 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
1955
1956#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
1957 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
1958
1959#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
1960 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
1961
1962#define IEM_MC_LOCAL(a_Type, a_Name) \
1963 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
1964
1965#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
1966 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
1967
1968
1969/**
1970 * Sets the host register for @a idxVarRc to @a idxReg.
1971 *
1972 * The register must not be allocated. Any guest register shadowing will be
1973 * implictly dropped by this call.
1974 *
1975 * The variable must not have any register associated with it (causes
1976 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
1977 * implied.
1978 *
1979 * @returns idxReg
1980 * @param pReNative The recompiler state.
1981 * @param idxVar The variable.
1982 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
1983 * @param off For recording in debug info.
1984 *
1985 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
1986 */
1987DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
1988{
1989 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
1990 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
1991 Assert(!pVar->fRegAcquired);
1992 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
1993 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
1994 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
1995
1996 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
1997 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
1998
1999 iemNativeVarSetKindToStack(pReNative, idxVar);
2000 pVar->idxReg = idxReg;
2001
2002 return idxReg;
2003}
2004
2005
2006/**
2007 * A convenient helper function.
2008 */
2009DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
2010 uint8_t idxReg, uint32_t *poff)
2011{
2012 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
2013 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
2014 return idxReg;
2015}
2016
2017
2018/**
2019 * This is called by IEM_MC_END() to clean up all variables.
2020 */
2021DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
2022{
2023 uint32_t const bmVars = pReNative->Core.bmVars;
2024 if (bmVars != 0)
2025 iemNativeVarFreeAllSlow(pReNative, bmVars);
2026 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
2027 Assert(pReNative->Core.bmStack == 0);
2028}
2029
2030
2031#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
2032
2033/**
2034 * This is called by IEM_MC_FREE_LOCAL.
2035 */
2036DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2037{
2038 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2039 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
2040 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2041}
2042
2043
2044#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
2045
2046/**
2047 * This is called by IEM_MC_FREE_ARG.
2048 */
2049DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2050{
2051 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2052 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
2053 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2054}
2055
2056
2057#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
2058
2059/**
2060 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
2061 */
2062DECL_INLINE_THROW(uint32_t)
2063iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
2064{
2065 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
2066 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
2067 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2068 Assert( pVarDst->cbVar == sizeof(uint16_t)
2069 || pVarDst->cbVar == sizeof(uint32_t));
2070
2071 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
2072 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
2073 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
2074 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
2075 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2076
2077 Assert(pVarDst->cbVar < pVarSrc->cbVar);
2078
2079 /*
2080 * Special case for immediates.
2081 */
2082 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
2083 {
2084 switch (pVarDst->cbVar)
2085 {
2086 case sizeof(uint16_t):
2087 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
2088 break;
2089 case sizeof(uint32_t):
2090 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
2091 break;
2092 default: AssertFailed(); break;
2093 }
2094 }
2095 else
2096 {
2097 /*
2098 * The generic solution for now.
2099 */
2100 /** @todo optimize this by having the python script make sure the source
2101 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
2102 * statement. Then we could just transfer the register assignments. */
2103 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
2104 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
2105 switch (pVarDst->cbVar)
2106 {
2107 case sizeof(uint16_t):
2108 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
2109 break;
2110 case sizeof(uint32_t):
2111 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
2112 break;
2113 default: AssertFailed(); break;
2114 }
2115 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
2116 iemNativeVarRegisterRelease(pReNative, idxVarDst);
2117 }
2118 return off;
2119}
2120
2121
2122
2123/*********************************************************************************************************************************
2124* Emitters for IEM_MC_CALL_CIMPL_XXX *
2125*********************************************************************************************************************************/
2126
2127/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
2128DECL_INLINE_THROW(uint32_t)
2129iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
2130 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
2131
2132{
2133 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
2134
2135#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2136 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
2137 when a calls clobber any of the relevatn control registers. */
2138# if 1
2139 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
2140 {
2141 /* Likely as long as call+ret are done via cimpl. */
2142 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
2143 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
2144 }
2145 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
2146 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2147 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
2148 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2149 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
2150 else
2151 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2152 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2153 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2154
2155# else
2156 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
2157 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2158 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
2159 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2160 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
2161 || pfnCImpl == (uintptr_t)iemCImpl_callf
2162 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
2163 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
2164 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2165 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2166 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2167# endif
2168#endif
2169
2170 /*
2171 * Do all the call setup and cleanup.
2172 */
2173 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
2174
2175 /*
2176 * Load the two or three hidden arguments.
2177 */
2178#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2179 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
2180 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2181 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
2182#else
2183 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2184 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
2185#endif
2186
2187 /*
2188 * Make the call and check the return code.
2189 *
2190 * Shadow PC copies are always flushed here, other stuff depends on flags.
2191 * Segment and general purpose registers are explictily flushed via the
2192 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
2193 * macros.
2194 */
2195 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
2196#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2197 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
2198#endif
2199 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
2200 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
2201 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
2202 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
2203
2204 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
2205}
2206
2207
2208#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
2209 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
2210
2211/** Emits code for IEM_MC_CALL_CIMPL_1. */
2212DECL_INLINE_THROW(uint32_t)
2213iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2214 uintptr_t pfnCImpl, uint8_t idxArg0)
2215{
2216 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2217 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
2218}
2219
2220
2221#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
2222 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
2223
2224/** Emits code for IEM_MC_CALL_CIMPL_2. */
2225DECL_INLINE_THROW(uint32_t)
2226iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2227 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
2228{
2229 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2230 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2231 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
2232}
2233
2234
2235#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
2236 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2237 (uintptr_t)a_pfnCImpl, a0, a1, a2)
2238
2239/** Emits code for IEM_MC_CALL_CIMPL_3. */
2240DECL_INLINE_THROW(uint32_t)
2241iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2242 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2243{
2244 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2245 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2246 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2247 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
2248}
2249
2250
2251#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
2252 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2253 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
2254
2255/** Emits code for IEM_MC_CALL_CIMPL_4. */
2256DECL_INLINE_THROW(uint32_t)
2257iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2258 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2259{
2260 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2261 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2262 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2263 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2264 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
2265}
2266
2267
2268#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
2269 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2270 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
2271
2272/** Emits code for IEM_MC_CALL_CIMPL_4. */
2273DECL_INLINE_THROW(uint32_t)
2274iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2275 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
2276{
2277 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2278 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2279 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2280 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2281 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
2282 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
2283}
2284
2285
2286/** Recompiler debugging: Flush guest register shadow copies. */
2287#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
2288
2289
2290
2291/*********************************************************************************************************************************
2292* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
2293*********************************************************************************************************************************/
2294
2295/**
2296 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
2297 */
2298DECL_INLINE_THROW(uint32_t)
2299iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2300 uintptr_t pfnAImpl, uint8_t cArgs)
2301{
2302 if (idxVarRc != UINT8_MAX)
2303 {
2304 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
2305 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
2306 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2307 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2308 }
2309
2310 /*
2311 * Do all the call setup and cleanup.
2312 */
2313 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
2314
2315 /*
2316 * Make the call and update the return code variable if we've got one.
2317 */
2318 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
2319 if (idxVarRc != UINT8_MAX)
2320 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
2321
2322 return off;
2323}
2324
2325
2326
2327#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
2328 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
2329
2330#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
2331 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
2332
2333/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
2334DECL_INLINE_THROW(uint32_t)
2335iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
2336{
2337 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
2338}
2339
2340
2341#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
2342 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
2343
2344#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
2345 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
2346
2347/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
2348DECL_INLINE_THROW(uint32_t)
2349iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
2350{
2351 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2352 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
2353}
2354
2355
2356#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
2357 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
2358
2359#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
2360 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
2361
2362/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
2363DECL_INLINE_THROW(uint32_t)
2364iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2365 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
2366{
2367 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2368 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2369 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
2370}
2371
2372
2373#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
2374 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
2375
2376#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
2377 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
2378
2379/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
2380DECL_INLINE_THROW(uint32_t)
2381iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2382 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2383{
2384 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2385 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2386 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2387 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
2388}
2389
2390
2391#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
2392 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2393
2394#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
2395 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2396
2397/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
2398DECL_INLINE_THROW(uint32_t)
2399iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2400 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2401{
2402 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2403 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2404 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2405 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
2406 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
2407}
2408
2409
2410
2411/*********************************************************************************************************************************
2412* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
2413*********************************************************************************************************************************/
2414
2415#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
2416 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
2417
2418#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2419 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
2420
2421#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2422 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
2423
2424#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2425 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
2426
2427
2428/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
2429 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
2430DECL_INLINE_THROW(uint32_t)
2431iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
2432{
2433 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2434 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2435 Assert(iGRegEx < 20);
2436
2437 /* Same discussion as in iemNativeEmitFetchGregU16 */
2438 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2439 kIemNativeGstRegUse_ReadOnly);
2440
2441 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2442 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2443
2444 /* The value is zero-extended to the full 64-bit host register width. */
2445 if (iGRegEx < 16)
2446 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2447 else
2448 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2449
2450 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2451 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2452 return off;
2453}
2454
2455
2456#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2457 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
2458
2459#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2460 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
2461
2462#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2463 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
2464
2465/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
2466DECL_INLINE_THROW(uint32_t)
2467iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
2468{
2469 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2470 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2471 Assert(iGRegEx < 20);
2472
2473 /* Same discussion as in iemNativeEmitFetchGregU16 */
2474 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2475 kIemNativeGstRegUse_ReadOnly);
2476
2477 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2478 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2479
2480 if (iGRegEx < 16)
2481 {
2482 switch (cbSignExtended)
2483 {
2484 case sizeof(uint16_t):
2485 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2486 break;
2487 case sizeof(uint32_t):
2488 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2489 break;
2490 case sizeof(uint64_t):
2491 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2492 break;
2493 default: AssertFailed(); break;
2494 }
2495 }
2496 else
2497 {
2498 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2499 switch (cbSignExtended)
2500 {
2501 case sizeof(uint16_t):
2502 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2503 break;
2504 case sizeof(uint32_t):
2505 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2506 break;
2507 case sizeof(uint64_t):
2508 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2509 break;
2510 default: AssertFailed(); break;
2511 }
2512 }
2513
2514 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2515 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2516 return off;
2517}
2518
2519
2520
2521#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
2522 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
2523
2524#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
2525 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2526
2527#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
2528 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2529
2530/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
2531DECL_INLINE_THROW(uint32_t)
2532iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2533{
2534 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2535 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2536 Assert(iGReg < 16);
2537
2538 /*
2539 * We can either just load the low 16-bit of the GPR into a host register
2540 * for the variable, or we can do so via a shadow copy host register. The
2541 * latter will avoid having to reload it if it's being stored later, but
2542 * will waste a host register if it isn't touched again. Since we don't
2543 * know what going to happen, we choose the latter for now.
2544 */
2545 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2546 kIemNativeGstRegUse_ReadOnly);
2547
2548 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2549 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2550 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2551 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2552
2553 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2554 return off;
2555}
2556
2557
2558#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
2559 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2560
2561#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
2562 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2563
2564/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
2565DECL_INLINE_THROW(uint32_t)
2566iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
2567{
2568 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2569 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2570 Assert(iGReg < 16);
2571
2572 /*
2573 * We can either just load the low 16-bit of the GPR into a host register
2574 * for the variable, or we can do so via a shadow copy host register. The
2575 * latter will avoid having to reload it if it's being stored later, but
2576 * will waste a host register if it isn't touched again. Since we don't
2577 * know what going to happen, we choose the latter for now.
2578 */
2579 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2580 kIemNativeGstRegUse_ReadOnly);
2581
2582 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2583 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2584 if (cbSignExtended == sizeof(uint32_t))
2585 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2586 else
2587 {
2588 Assert(cbSignExtended == sizeof(uint64_t));
2589 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2590 }
2591 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2592
2593 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2594 return off;
2595}
2596
2597
2598#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
2599 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
2600
2601#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
2602 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
2603
2604/** Emits code for IEM_MC_FETCH_GREG_U32. */
2605DECL_INLINE_THROW(uint32_t)
2606iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2607{
2608 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2609 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2610 Assert(iGReg < 16);
2611
2612 /*
2613 * We can either just load the low 16-bit of the GPR into a host register
2614 * for the variable, or we can do so via a shadow copy host register. The
2615 * latter will avoid having to reload it if it's being stored later, but
2616 * will waste a host register if it isn't touched again. Since we don't
2617 * know what going to happen, we choose the latter for now.
2618 */
2619 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2620 kIemNativeGstRegUse_ReadOnly);
2621
2622 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2623 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2624 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2625 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2626
2627 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2628 return off;
2629}
2630
2631
2632#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
2633 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
2634
2635/** Emits code for IEM_MC_FETCH_GREG_U32. */
2636DECL_INLINE_THROW(uint32_t)
2637iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2638{
2639 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2640 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2641 Assert(iGReg < 16);
2642
2643 /*
2644 * We can either just load the low 32-bit of the GPR into a host register
2645 * for the variable, or we can do so via a shadow copy host register. The
2646 * latter will avoid having to reload it if it's being stored later, but
2647 * will waste a host register if it isn't touched again. Since we don't
2648 * know what going to happen, we choose the latter for now.
2649 */
2650 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2651 kIemNativeGstRegUse_ReadOnly);
2652
2653 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2654 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2655 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2656 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2657
2658 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2659 return off;
2660}
2661
2662
2663#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
2664 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2665
2666#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
2667 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2668
2669/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
2670 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
2671DECL_INLINE_THROW(uint32_t)
2672iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2673{
2674 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2675 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2676 Assert(iGReg < 16);
2677
2678 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2679 kIemNativeGstRegUse_ReadOnly);
2680
2681 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2682 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2683 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
2684 /** @todo name the register a shadow one already? */
2685 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2686
2687 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2688 return off;
2689}
2690
2691
2692
2693/*********************************************************************************************************************************
2694* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
2695*********************************************************************************************************************************/
2696
2697#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
2698 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
2699
2700/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
2701DECL_INLINE_THROW(uint32_t)
2702iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
2703{
2704 Assert(iGRegEx < 20);
2705 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2706 kIemNativeGstRegUse_ForUpdate);
2707#ifdef RT_ARCH_AMD64
2708 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2709
2710 /* To the lowest byte of the register: mov r8, imm8 */
2711 if (iGRegEx < 16)
2712 {
2713 if (idxGstTmpReg >= 8)
2714 pbCodeBuf[off++] = X86_OP_REX_B;
2715 else if (idxGstTmpReg >= 4)
2716 pbCodeBuf[off++] = X86_OP_REX;
2717 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2718 pbCodeBuf[off++] = u8Value;
2719 }
2720 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
2721 else if (idxGstTmpReg < 4)
2722 {
2723 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
2724 pbCodeBuf[off++] = u8Value;
2725 }
2726 else
2727 {
2728 /* ror reg64, 8 */
2729 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2730 pbCodeBuf[off++] = 0xc1;
2731 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2732 pbCodeBuf[off++] = 8;
2733
2734 /* mov reg8, imm8 */
2735 if (idxGstTmpReg >= 8)
2736 pbCodeBuf[off++] = X86_OP_REX_B;
2737 else if (idxGstTmpReg >= 4)
2738 pbCodeBuf[off++] = X86_OP_REX;
2739 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2740 pbCodeBuf[off++] = u8Value;
2741
2742 /* rol reg64, 8 */
2743 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2744 pbCodeBuf[off++] = 0xc1;
2745 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2746 pbCodeBuf[off++] = 8;
2747 }
2748
2749#elif defined(RT_ARCH_ARM64)
2750 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
2751 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2752 if (iGRegEx < 16)
2753 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
2754 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
2755 else
2756 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
2757 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
2758 iemNativeRegFreeTmp(pReNative, idxImmReg);
2759
2760#else
2761# error "Port me!"
2762#endif
2763
2764 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2765
2766 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
2767
2768 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2769 return off;
2770}
2771
2772
2773#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
2774 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
2775
2776/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
2777DECL_INLINE_THROW(uint32_t)
2778iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
2779{
2780 Assert(iGRegEx < 20);
2781 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
2782
2783 /*
2784 * If it's a constant value (unlikely) we treat this as a
2785 * IEM_MC_STORE_GREG_U8_CONST statement.
2786 */
2787 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
2788 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
2789 { /* likely */ }
2790 else
2791 {
2792 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
2793 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2794 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
2795 }
2796
2797 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2798 kIemNativeGstRegUse_ForUpdate);
2799 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
2800
2801#ifdef RT_ARCH_AMD64
2802 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
2803 if (iGRegEx < 16)
2804 {
2805 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
2806 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
2807 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
2808 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
2809 pbCodeBuf[off++] = X86_OP_REX;
2810 pbCodeBuf[off++] = 0x8a;
2811 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
2812 }
2813 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
2814 else if (idxGstTmpReg < 4 && idxVarReg < 4)
2815 {
2816 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
2817 pbCodeBuf[off++] = 0x8a;
2818 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
2819 }
2820 else
2821 {
2822 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
2823
2824 /* ror reg64, 8 */
2825 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2826 pbCodeBuf[off++] = 0xc1;
2827 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2828 pbCodeBuf[off++] = 8;
2829
2830 /* mov reg8, reg8(r/m) */
2831 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
2832 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
2833 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
2834 pbCodeBuf[off++] = X86_OP_REX;
2835 pbCodeBuf[off++] = 0x8a;
2836 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
2837
2838 /* rol reg64, 8 */
2839 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2840 pbCodeBuf[off++] = 0xc1;
2841 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2842 pbCodeBuf[off++] = 8;
2843 }
2844
2845#elif defined(RT_ARCH_ARM64)
2846 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
2847 or
2848 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
2849 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2850 if (iGRegEx < 16)
2851 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
2852 else
2853 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
2854
2855#else
2856# error "Port me!"
2857#endif
2858 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2859
2860 iemNativeVarRegisterRelease(pReNative, idxValueVar);
2861
2862 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
2863 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2864 return off;
2865}
2866
2867
2868
2869#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
2870 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
2871
2872/** Emits code for IEM_MC_STORE_GREG_U16. */
2873DECL_INLINE_THROW(uint32_t)
2874iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
2875{
2876 Assert(iGReg < 16);
2877 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2878 kIemNativeGstRegUse_ForUpdate);
2879#ifdef RT_ARCH_AMD64
2880 /* mov reg16, imm16 */
2881 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
2882 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2883 if (idxGstTmpReg >= 8)
2884 pbCodeBuf[off++] = X86_OP_REX_B;
2885 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
2886 pbCodeBuf[off++] = RT_BYTE1(uValue);
2887 pbCodeBuf[off++] = RT_BYTE2(uValue);
2888
2889#elif defined(RT_ARCH_ARM64)
2890 /* movk xdst, #uValue, lsl #0 */
2891 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2892 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
2893
2894#else
2895# error "Port me!"
2896#endif
2897
2898 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2899
2900 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
2901 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2902 return off;
2903}
2904
2905
2906#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
2907 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
2908
2909/** Emits code for IEM_MC_STORE_GREG_U16. */
2910DECL_INLINE_THROW(uint32_t)
2911iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
2912{
2913 Assert(iGReg < 16);
2914 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
2915
2916 /*
2917 * If it's a constant value (unlikely) we treat this as a
2918 * IEM_MC_STORE_GREG_U16_CONST statement.
2919 */
2920 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
2921 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
2922 { /* likely */ }
2923 else
2924 {
2925 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
2926 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2927 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
2928 }
2929
2930 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2931 kIemNativeGstRegUse_ForUpdate);
2932
2933#ifdef RT_ARCH_AMD64
2934 /* mov reg16, reg16 or [mem16] */
2935 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2936 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2937 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
2938 {
2939 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
2940 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
2941 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
2942 pbCodeBuf[off++] = 0x8b;
2943 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
2944 }
2945 else
2946 {
2947 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
2948 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
2949 if (idxGstTmpReg >= 8)
2950 pbCodeBuf[off++] = X86_OP_REX_R;
2951 pbCodeBuf[off++] = 0x8b;
2952 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
2953 }
2954
2955#elif defined(RT_ARCH_ARM64)
2956 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
2957 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
2958 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2959 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
2960 iemNativeVarRegisterRelease(pReNative, idxValueVar);
2961
2962#else
2963# error "Port me!"
2964#endif
2965
2966 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2967
2968 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
2969 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2970 return off;
2971}
2972
2973
2974#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
2975 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
2976
2977/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
2978DECL_INLINE_THROW(uint32_t)
2979iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
2980{
2981 Assert(iGReg < 16);
2982 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2983 kIemNativeGstRegUse_ForFullWrite);
2984 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
2985 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
2986 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2987 return off;
2988}
2989
2990
2991#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
2992 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
2993
2994/** Emits code for IEM_MC_STORE_GREG_U32. */
2995DECL_INLINE_THROW(uint32_t)
2996iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
2997{
2998 Assert(iGReg < 16);
2999 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3000
3001 /*
3002 * If it's a constant value (unlikely) we treat this as a
3003 * IEM_MC_STORE_GREG_U32_CONST statement.
3004 */
3005 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3006 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3007 { /* likely */ }
3008 else
3009 {
3010 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3011 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3012 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
3013 }
3014
3015 /*
3016 * For the rest we allocate a guest register for the variable and writes
3017 * it to the CPUMCTX structure.
3018 */
3019 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3020 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3021#ifdef VBOX_STRICT
3022 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
3023#endif
3024 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3025 return off;
3026}
3027
3028
3029#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
3030 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
3031
3032/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
3033DECL_INLINE_THROW(uint32_t)
3034iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
3035{
3036 Assert(iGReg < 16);
3037 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3038 kIemNativeGstRegUse_ForFullWrite);
3039 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3040 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3041 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3042 return off;
3043}
3044
3045
3046#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
3047 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
3048
3049/** Emits code for IEM_MC_STORE_GREG_U64. */
3050DECL_INLINE_THROW(uint32_t)
3051iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3052{
3053 Assert(iGReg < 16);
3054 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3055
3056 /*
3057 * If it's a constant value (unlikely) we treat this as a
3058 * IEM_MC_STORE_GREG_U64_CONST statement.
3059 */
3060 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3061 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3062 { /* likely */ }
3063 else
3064 {
3065 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3066 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3067 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
3068 }
3069
3070 /*
3071 * For the rest we allocate a guest register for the variable and writes
3072 * it to the CPUMCTX structure.
3073 */
3074 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3075 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3076 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3077 return off;
3078}
3079
3080
3081#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
3082 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
3083
3084/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
3085DECL_INLINE_THROW(uint32_t)
3086iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
3087{
3088 Assert(iGReg < 16);
3089 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3090 kIemNativeGstRegUse_ForUpdate);
3091 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
3092 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3093 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3094 return off;
3095}
3096
3097
3098/*********************************************************************************************************************************
3099* General purpose register manipulation (add, sub). *
3100*********************************************************************************************************************************/
3101
3102#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3103 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3104
3105/** Emits code for IEM_MC_ADD_GREG_U16. */
3106DECL_INLINE_THROW(uint32_t)
3107iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
3108{
3109 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3110 kIemNativeGstRegUse_ForUpdate);
3111
3112#ifdef RT_ARCH_AMD64
3113 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3114 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3115 if (idxGstTmpReg >= 8)
3116 pbCodeBuf[off++] = X86_OP_REX_B;
3117 if (uAddend == 1)
3118 {
3119 pbCodeBuf[off++] = 0xff; /* inc */
3120 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3121 }
3122 else
3123 {
3124 pbCodeBuf[off++] = 0x81;
3125 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3126 pbCodeBuf[off++] = uAddend;
3127 pbCodeBuf[off++] = 0;
3128 }
3129
3130#else
3131 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3132 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3133
3134 /* sub tmp, gstgrp, uAddend */
3135 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
3136
3137 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3138 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3139
3140 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3141#endif
3142
3143 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3144
3145 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3146
3147 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3148 return off;
3149}
3150
3151
3152#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
3153 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3154
3155#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
3156 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3157
3158/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
3159DECL_INLINE_THROW(uint32_t)
3160iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
3161{
3162 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3163 kIemNativeGstRegUse_ForUpdate);
3164
3165#ifdef RT_ARCH_AMD64
3166 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3167 if (f64Bit)
3168 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3169 else if (idxGstTmpReg >= 8)
3170 pbCodeBuf[off++] = X86_OP_REX_B;
3171 if (uAddend == 1)
3172 {
3173 pbCodeBuf[off++] = 0xff; /* inc */
3174 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3175 }
3176 else if (uAddend < 128)
3177 {
3178 pbCodeBuf[off++] = 0x83; /* add */
3179 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3180 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3181 }
3182 else
3183 {
3184 pbCodeBuf[off++] = 0x81; /* add */
3185 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3186 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3187 pbCodeBuf[off++] = 0;
3188 pbCodeBuf[off++] = 0;
3189 pbCodeBuf[off++] = 0;
3190 }
3191
3192#else
3193 /* sub tmp, gstgrp, uAddend */
3194 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3195 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
3196
3197#endif
3198
3199 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3200
3201 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3202
3203 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3204 return off;
3205}
3206
3207
3208
3209#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3210 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3211
3212/** Emits code for IEM_MC_SUB_GREG_U16. */
3213DECL_INLINE_THROW(uint32_t)
3214iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
3215{
3216 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3217 kIemNativeGstRegUse_ForUpdate);
3218
3219#ifdef RT_ARCH_AMD64
3220 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3221 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3222 if (idxGstTmpReg >= 8)
3223 pbCodeBuf[off++] = X86_OP_REX_B;
3224 if (uSubtrahend == 1)
3225 {
3226 pbCodeBuf[off++] = 0xff; /* dec */
3227 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3228 }
3229 else
3230 {
3231 pbCodeBuf[off++] = 0x81;
3232 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3233 pbCodeBuf[off++] = uSubtrahend;
3234 pbCodeBuf[off++] = 0;
3235 }
3236
3237#else
3238 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3239 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3240
3241 /* sub tmp, gstgrp, uSubtrahend */
3242 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
3243
3244 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3245 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3246
3247 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3248#endif
3249
3250 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3251
3252 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3253
3254 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3255 return off;
3256}
3257
3258
3259#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
3260 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3261
3262#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
3263 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3264
3265/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
3266DECL_INLINE_THROW(uint32_t)
3267iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
3268{
3269 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3270 kIemNativeGstRegUse_ForUpdate);
3271
3272#ifdef RT_ARCH_AMD64
3273 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3274 if (f64Bit)
3275 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3276 else if (idxGstTmpReg >= 8)
3277 pbCodeBuf[off++] = X86_OP_REX_B;
3278 if (uSubtrahend == 1)
3279 {
3280 pbCodeBuf[off++] = 0xff; /* dec */
3281 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3282 }
3283 else if (uSubtrahend < 128)
3284 {
3285 pbCodeBuf[off++] = 0x83; /* sub */
3286 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3287 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3288 }
3289 else
3290 {
3291 pbCodeBuf[off++] = 0x81; /* sub */
3292 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3293 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3294 pbCodeBuf[off++] = 0;
3295 pbCodeBuf[off++] = 0;
3296 pbCodeBuf[off++] = 0;
3297 }
3298
3299#else
3300 /* sub tmp, gstgrp, uSubtrahend */
3301 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3302 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
3303
3304#endif
3305
3306 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3307
3308 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3309
3310 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3311 return off;
3312}
3313
3314
3315/*********************************************************************************************************************************
3316* Local variable manipulation (add, sub, and, or). *
3317*********************************************************************************************************************************/
3318
3319#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
3320 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3321
3322#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
3323 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3324
3325#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
3326 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3327
3328#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
3329 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3330
3331/** Emits code for AND'ing a local and a constant value. */
3332DECL_INLINE_THROW(uint32_t)
3333iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3334{
3335#ifdef VBOX_STRICT
3336 switch (cbMask)
3337 {
3338 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3339 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3340 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3341 case sizeof(uint64_t): break;
3342 default: AssertFailedBreak();
3343 }
3344#endif
3345
3346 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3347 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3348
3349 if (cbMask <= sizeof(uint32_t))
3350 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
3351 else
3352 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
3353
3354 iemNativeVarRegisterRelease(pReNative, idxVar);
3355 return off;
3356}
3357
3358
3359#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
3360 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3361
3362#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
3363 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3364
3365#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
3366 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3367
3368#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
3369 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3370
3371/** Emits code for OR'ing a local and a constant value. */
3372DECL_INLINE_THROW(uint32_t)
3373iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3374{
3375#ifdef VBOX_STRICT
3376 switch (cbMask)
3377 {
3378 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3379 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3380 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3381 case sizeof(uint64_t): break;
3382 default: AssertFailedBreak();
3383 }
3384#endif
3385
3386 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3387 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3388
3389 if (cbMask <= sizeof(uint32_t))
3390 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
3391 else
3392 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
3393
3394 iemNativeVarRegisterRelease(pReNative, idxVar);
3395 return off;
3396}
3397
3398
3399#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
3400 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
3401
3402#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
3403 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
3404
3405#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
3406 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
3407
3408/** Emits code for reversing the byte order in a local value. */
3409DECL_INLINE_THROW(uint32_t)
3410iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
3411{
3412 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3413 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3414
3415 switch (cbLocal)
3416 {
3417 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
3418 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
3419 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
3420 default: AssertFailedBreak();
3421 }
3422
3423 iemNativeVarRegisterRelease(pReNative, idxVar);
3424 return off;
3425}
3426
3427
3428
3429/*********************************************************************************************************************************
3430* EFLAGS *
3431*********************************************************************************************************************************/
3432
3433#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
3434# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
3435#else
3436# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
3437 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
3438
3439DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
3440{
3441 if (fEflOutput)
3442 {
3443 PVMCPUCC const pVCpu = pReNative->pVCpu;
3444# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3445 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
3446 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
3447 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
3448# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3449 if (fEflOutput & (a_fEfl)) \
3450 { \
3451 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
3452 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3453 else \
3454 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3455 } else do { } while (0)
3456# else
3457 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
3458 IEMLIVENESSBIT const LivenessClobbered =
3459 {
3460 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3461 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3462 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3463 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3464 };
3465 IEMLIVENESSBIT const LivenessDelayable =
3466 {
3467 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3468 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3469 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3470 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3471 };
3472# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3473 if (fEflOutput & (a_fEfl)) \
3474 { \
3475 if (LivenessClobbered.a_fLivenessMember) \
3476 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3477 else if (LivenessDelayable.a_fLivenessMember) \
3478 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
3479 else \
3480 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3481 } else do { } while (0)
3482# endif
3483 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
3484 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
3485 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
3486 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
3487 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
3488 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
3489 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
3490# undef CHECK_FLAG_AND_UPDATE_STATS
3491 }
3492 RT_NOREF(fEflInput);
3493}
3494#endif /* VBOX_WITH_STATISTICS */
3495
3496#undef IEM_MC_FETCH_EFLAGS /* should not be used */
3497#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3498 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
3499
3500/** Handles IEM_MC_FETCH_EFLAGS_EX. */
3501DECL_INLINE_THROW(uint32_t)
3502iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
3503 uint32_t fEflInput, uint32_t fEflOutput)
3504{
3505 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
3506 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
3507 RT_NOREF(fEflInput, fEflOutput);
3508
3509#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3510# ifdef VBOX_STRICT
3511 if ( pReNative->idxCurCall != 0
3512 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
3513 {
3514 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
3515 uint32_t const fBoth = fEflInput | fEflOutput;
3516# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
3517 AssertMsg( !(fBoth & (a_fElfConst)) \
3518 || (!(fEflInput & (a_fElfConst)) \
3519 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
3520 : !(fEflOutput & (a_fElfConst)) \
3521 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
3522 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
3523 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
3524 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
3525 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
3526 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
3527 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
3528 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
3529 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
3530 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
3531# undef ASSERT_ONE_EFL
3532 }
3533# endif
3534#endif
3535
3536 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
3537
3538 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
3539 * the existing shadow copy. */
3540 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
3541 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
3542 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
3543 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
3544 return off;
3545}
3546
3547
3548
3549/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
3550 * start using it with custom native code emission (inlining assembly
3551 * instruction helpers). */
3552#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
3553#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3554 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
3555 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
3556
3557#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
3558#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3559 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
3560 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
3561
3562/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
3563DECL_INLINE_THROW(uint32_t)
3564iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
3565 bool fUpdateSkipping)
3566{
3567 RT_NOREF(fEflOutput);
3568 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
3569 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
3570
3571#ifdef VBOX_STRICT
3572 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
3573 uint32_t offFixup = off;
3574 off = iemNativeEmitJnzToFixed(pReNative, off, off);
3575 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
3576 iemNativeFixupFixedJump(pReNative, offFixup, off);
3577
3578 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
3579 offFixup = off;
3580 off = iemNativeEmitJzToFixed(pReNative, off, off);
3581 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
3582 iemNativeFixupFixedJump(pReNative, offFixup, off);
3583
3584 /** @todo validate that only bits in the fElfOutput mask changed. */
3585#endif
3586
3587#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3588 if (fUpdateSkipping)
3589 {
3590 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
3591 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
3592 else
3593 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
3594 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
3595 }
3596#else
3597 RT_NOREF_PV(fUpdateSkipping);
3598#endif
3599
3600 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
3601 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
3602 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
3603 return off;
3604}
3605
3606
3607
3608/*********************************************************************************************************************************
3609* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
3610*********************************************************************************************************************************/
3611
3612#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
3613 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
3614
3615#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
3616 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
3617
3618#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
3619 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
3620
3621
3622/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
3623 * IEM_MC_FETCH_SREG_ZX_U64. */
3624DECL_INLINE_THROW(uint32_t)
3625iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
3626{
3627 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3628 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
3629 Assert(iSReg < X86_SREG_COUNT);
3630
3631 /*
3632 * For now, we will not create a shadow copy of a selector. The rational
3633 * is that since we do not recompile the popping and loading of segment
3634 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
3635 * pushing and moving to registers, there is only a small chance that the
3636 * shadow copy will be accessed again before the register is reloaded. One
3637 * scenario would be nested called in 16-bit code, but I doubt it's worth
3638 * the extra register pressure atm.
3639 *
3640 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
3641 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
3642 * store scencario covered at present (r160730).
3643 */
3644 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3645 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3646 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
3647 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3648 return off;
3649}
3650
3651
3652
3653/*********************************************************************************************************************************
3654* Register references. *
3655*********************************************************************************************************************************/
3656
3657#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
3658 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
3659
3660#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
3661 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
3662
3663/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
3664DECL_INLINE_THROW(uint32_t)
3665iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
3666{
3667 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
3668 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
3669 Assert(iGRegEx < 20);
3670
3671 if (iGRegEx < 16)
3672 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
3673 else
3674 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
3675
3676 /* If we've delayed writing back the register value, flush it now. */
3677 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
3678
3679 /* If it's not a const reference we need to flush the shadow copy of the register now. */
3680 if (!fConst)
3681 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
3682
3683 return off;
3684}
3685
3686#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
3687 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
3688
3689#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
3690 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
3691
3692#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
3693 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
3694
3695#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
3696 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
3697
3698#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
3699 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
3700
3701#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
3702 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
3703
3704#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
3705 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
3706
3707#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
3708 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
3709
3710#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
3711 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
3712
3713#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
3714 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
3715
3716/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
3717DECL_INLINE_THROW(uint32_t)
3718iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
3719{
3720 Assert(iGReg < 16);
3721 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
3722 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
3723
3724 /* If we've delayed writing back the register value, flush it now. */
3725 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
3726
3727 /* If it's not a const reference we need to flush the shadow copy of the register now. */
3728 if (!fConst)
3729 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
3730
3731 return off;
3732}
3733
3734
3735#undef IEM_MC_REF_EFLAGS /* should not be used. */
3736#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
3737 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
3738 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
3739
3740/** Handles IEM_MC_REF_EFLAGS. */
3741DECL_INLINE_THROW(uint32_t)
3742iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
3743{
3744 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
3745 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
3746
3747#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3748 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
3749
3750 /* Updating the skipping according to the outputs is a little early, but
3751 we don't have any other hooks for references atm. */
3752 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
3753 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
3754 else if (fEflOutput & X86_EFL_STATUS_BITS)
3755 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
3756 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
3757#else
3758 RT_NOREF(fEflInput, fEflOutput);
3759#endif
3760
3761 /* If we've delayed writing back the register value, flush it now. */
3762 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
3763
3764 /* If there is a shadow copy of guest EFLAGS, flush it now. */
3765 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
3766
3767 return off;
3768}
3769
3770
3771/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
3772 * different code from threaded recompiler, maybe it would be helpful. For now
3773 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
3774#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
3775
3776
3777#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
3778 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
3779
3780#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
3781 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
3782
3783#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
3784 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
3785
3786/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
3787DECL_INLINE_THROW(uint32_t)
3788iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
3789{
3790 Assert(iXReg < 16);
3791 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
3792 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
3793
3794 /* If we've delayed writing back the register value, flush it now. */
3795 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
3796
3797#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3798 /* If it's not a const reference we need to flush the shadow copy of the register now. */
3799 if (!fConst)
3800 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
3801#else
3802 RT_NOREF(fConst);
3803#endif
3804
3805 return off;
3806}
3807
3808
3809#define IEM_MC_REF_MXCSR(a_pfMxcsr) \
3810 off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
3811
3812/** Handles IEM_MC_REF_MXCSR. */
3813DECL_INLINE_THROW(uint32_t)
3814iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
3815{
3816 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
3817 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
3818
3819 /* If we've delayed writing back the register value, flush it now. */
3820 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
3821
3822 /* If there is a shadow copy of guest MXCSR, flush it now. */
3823 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
3824
3825 return off;
3826}
3827
3828
3829
3830/*********************************************************************************************************************************
3831* Effective Address Calculation *
3832*********************************************************************************************************************************/
3833#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
3834 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
3835
3836/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
3837 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
3838DECL_INLINE_THROW(uint32_t)
3839iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3840 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
3841{
3842 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
3843
3844 /*
3845 * Handle the disp16 form with no registers first.
3846 *
3847 * Convert to an immediate value, as that'll delay the register allocation
3848 * and assignment till the memory access / call / whatever and we can use
3849 * a more appropriate register (or none at all).
3850 */
3851 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
3852 {
3853 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
3854 return off;
3855 }
3856
3857 /* Determin the displacment. */
3858 uint16_t u16EffAddr;
3859 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
3860 {
3861 case 0: u16EffAddr = 0; break;
3862 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
3863 case 2: u16EffAddr = u16Disp; break;
3864 default: AssertFailedStmt(u16EffAddr = 0);
3865 }
3866
3867 /* Determine the registers involved. */
3868 uint8_t idxGstRegBase;
3869 uint8_t idxGstRegIndex;
3870 switch (bRm & X86_MODRM_RM_MASK)
3871 {
3872 case 0:
3873 idxGstRegBase = X86_GREG_xBX;
3874 idxGstRegIndex = X86_GREG_xSI;
3875 break;
3876 case 1:
3877 idxGstRegBase = X86_GREG_xBX;
3878 idxGstRegIndex = X86_GREG_xDI;
3879 break;
3880 case 2:
3881 idxGstRegBase = X86_GREG_xBP;
3882 idxGstRegIndex = X86_GREG_xSI;
3883 break;
3884 case 3:
3885 idxGstRegBase = X86_GREG_xBP;
3886 idxGstRegIndex = X86_GREG_xDI;
3887 break;
3888 case 4:
3889 idxGstRegBase = X86_GREG_xSI;
3890 idxGstRegIndex = UINT8_MAX;
3891 break;
3892 case 5:
3893 idxGstRegBase = X86_GREG_xDI;
3894 idxGstRegIndex = UINT8_MAX;
3895 break;
3896 case 6:
3897 idxGstRegBase = X86_GREG_xBP;
3898 idxGstRegIndex = UINT8_MAX;
3899 break;
3900#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
3901 default:
3902#endif
3903 case 7:
3904 idxGstRegBase = X86_GREG_xBX;
3905 idxGstRegIndex = UINT8_MAX;
3906 break;
3907 }
3908
3909 /*
3910 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
3911 */
3912 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
3913 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
3914 kIemNativeGstRegUse_ReadOnly);
3915 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
3916 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
3917 kIemNativeGstRegUse_ReadOnly)
3918 : UINT8_MAX;
3919#ifdef RT_ARCH_AMD64
3920 if (idxRegIndex == UINT8_MAX)
3921 {
3922 if (u16EffAddr == 0)
3923 {
3924 /* movxz ret, base */
3925 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
3926 }
3927 else
3928 {
3929 /* lea ret32, [base64 + disp32] */
3930 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
3931 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
3932 if (idxRegRet >= 8 || idxRegBase >= 8)
3933 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
3934 pbCodeBuf[off++] = 0x8d;
3935 if (idxRegBase != X86_GREG_x12 /*SIB*/)
3936 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
3937 else
3938 {
3939 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
3940 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
3941 }
3942 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
3943 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
3944 pbCodeBuf[off++] = 0;
3945 pbCodeBuf[off++] = 0;
3946 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3947
3948 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
3949 }
3950 }
3951 else
3952 {
3953 /* lea ret32, [index64 + base64 (+ disp32)] */
3954 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
3955 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
3956 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
3957 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
3958 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
3959 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
3960 pbCodeBuf[off++] = 0x8d;
3961 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
3962 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
3963 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
3964 if (bMod == X86_MOD_MEM4)
3965 {
3966 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
3967 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
3968 pbCodeBuf[off++] = 0;
3969 pbCodeBuf[off++] = 0;
3970 }
3971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3972 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
3973 }
3974
3975#elif defined(RT_ARCH_ARM64)
3976 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
3977 if (u16EffAddr == 0)
3978 {
3979 if (idxRegIndex == UINT8_MAX)
3980 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
3981 else
3982 {
3983 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
3984 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
3985 }
3986 }
3987 else
3988 {
3989 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
3990 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
3991 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
3992 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
3993 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
3994 else
3995 {
3996 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
3997 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
3998 }
3999 if (idxRegIndex != UINT8_MAX)
4000 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
4001 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4002 }
4003
4004#else
4005# error "port me"
4006#endif
4007
4008 if (idxRegIndex != UINT8_MAX)
4009 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4010 iemNativeRegFreeTmp(pReNative, idxRegBase);
4011 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4012 return off;
4013}
4014
4015
4016#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
4017 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
4018
4019/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
4020 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
4021DECL_INLINE_THROW(uint32_t)
4022iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4023 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
4024{
4025 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4026
4027 /*
4028 * Handle the disp32 form with no registers first.
4029 *
4030 * Convert to an immediate value, as that'll delay the register allocation
4031 * and assignment till the memory access / call / whatever and we can use
4032 * a more appropriate register (or none at all).
4033 */
4034 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4035 {
4036 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
4037 return off;
4038 }
4039
4040 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4041 uint32_t u32EffAddr = 0;
4042 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4043 {
4044 case 0: break;
4045 case 1: u32EffAddr = (int8_t)u32Disp; break;
4046 case 2: u32EffAddr = u32Disp; break;
4047 default: AssertFailed();
4048 }
4049
4050 /* Get the register (or SIB) value. */
4051 uint8_t idxGstRegBase = UINT8_MAX;
4052 uint8_t idxGstRegIndex = UINT8_MAX;
4053 uint8_t cShiftIndex = 0;
4054 switch (bRm & X86_MODRM_RM_MASK)
4055 {
4056 case 0: idxGstRegBase = X86_GREG_xAX; break;
4057 case 1: idxGstRegBase = X86_GREG_xCX; break;
4058 case 2: idxGstRegBase = X86_GREG_xDX; break;
4059 case 3: idxGstRegBase = X86_GREG_xBX; break;
4060 case 4: /* SIB */
4061 {
4062 /* index /w scaling . */
4063 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4064 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4065 {
4066 case 0: idxGstRegIndex = X86_GREG_xAX; break;
4067 case 1: idxGstRegIndex = X86_GREG_xCX; break;
4068 case 2: idxGstRegIndex = X86_GREG_xDX; break;
4069 case 3: idxGstRegIndex = X86_GREG_xBX; break;
4070 case 4: cShiftIndex = 0; /*no index*/ break;
4071 case 5: idxGstRegIndex = X86_GREG_xBP; break;
4072 case 6: idxGstRegIndex = X86_GREG_xSI; break;
4073 case 7: idxGstRegIndex = X86_GREG_xDI; break;
4074 }
4075
4076 /* base */
4077 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
4078 {
4079 case 0: idxGstRegBase = X86_GREG_xAX; break;
4080 case 1: idxGstRegBase = X86_GREG_xCX; break;
4081 case 2: idxGstRegBase = X86_GREG_xDX; break;
4082 case 3: idxGstRegBase = X86_GREG_xBX; break;
4083 case 4:
4084 idxGstRegBase = X86_GREG_xSP;
4085 u32EffAddr += uSibAndRspOffset >> 8;
4086 break;
4087 case 5:
4088 if ((bRm & X86_MODRM_MOD_MASK) != 0)
4089 idxGstRegBase = X86_GREG_xBP;
4090 else
4091 {
4092 Assert(u32EffAddr == 0);
4093 u32EffAddr = u32Disp;
4094 }
4095 break;
4096 case 6: idxGstRegBase = X86_GREG_xSI; break;
4097 case 7: idxGstRegBase = X86_GREG_xDI; break;
4098 }
4099 break;
4100 }
4101 case 5: idxGstRegBase = X86_GREG_xBP; break;
4102 case 6: idxGstRegBase = X86_GREG_xSI; break;
4103 case 7: idxGstRegBase = X86_GREG_xDI; break;
4104 }
4105
4106 /*
4107 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4108 * the start of the function.
4109 */
4110 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4111 {
4112 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
4113 return off;
4114 }
4115
4116 /*
4117 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4118 */
4119 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4120 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4121 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4122 kIemNativeGstRegUse_ReadOnly);
4123 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4124 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4125 kIemNativeGstRegUse_ReadOnly);
4126
4127 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4128 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4129 {
4130 idxRegBase = idxRegIndex;
4131 idxRegIndex = UINT8_MAX;
4132 }
4133
4134#ifdef RT_ARCH_AMD64
4135 if (idxRegIndex == UINT8_MAX)
4136 {
4137 if (u32EffAddr == 0)
4138 {
4139 /* mov ret, base */
4140 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4141 }
4142 else
4143 {
4144 /* lea ret32, [base64 + disp32] */
4145 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4146 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4147 if (idxRegRet >= 8 || idxRegBase >= 8)
4148 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4149 pbCodeBuf[off++] = 0x8d;
4150 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4151 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4152 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4153 else
4154 {
4155 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4156 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4157 }
4158 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4159 if (bMod == X86_MOD_MEM4)
4160 {
4161 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4162 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4163 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4164 }
4165 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4166 }
4167 }
4168 else
4169 {
4170 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4171 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4172 if (idxRegBase == UINT8_MAX)
4173 {
4174 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
4175 if (idxRegRet >= 8 || idxRegIndex >= 8)
4176 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4177 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4178 pbCodeBuf[off++] = 0x8d;
4179 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4180 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4181 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4182 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4183 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4184 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4185 }
4186 else
4187 {
4188 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4189 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4190 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4191 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4192 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4193 pbCodeBuf[off++] = 0x8d;
4194 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4195 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4196 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4197 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4198 if (bMod != X86_MOD_MEM0)
4199 {
4200 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4201 if (bMod == X86_MOD_MEM4)
4202 {
4203 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4204 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4205 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4206 }
4207 }
4208 }
4209 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4210 }
4211
4212#elif defined(RT_ARCH_ARM64)
4213 if (u32EffAddr == 0)
4214 {
4215 if (idxRegIndex == UINT8_MAX)
4216 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4217 else if (idxRegBase == UINT8_MAX)
4218 {
4219 if (cShiftIndex == 0)
4220 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
4221 else
4222 {
4223 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4224 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
4225 }
4226 }
4227 else
4228 {
4229 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4230 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
4231 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4232 }
4233 }
4234 else
4235 {
4236 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
4237 {
4238 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4239 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
4240 }
4241 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
4242 {
4243 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4244 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4245 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
4246 }
4247 else
4248 {
4249 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
4250 if (idxRegBase != UINT8_MAX)
4251 {
4252 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4253 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4254 }
4255 }
4256 if (idxRegIndex != UINT8_MAX)
4257 {
4258 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4259 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
4260 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4261 }
4262 }
4263
4264#else
4265# error "port me"
4266#endif
4267
4268 if (idxRegIndex != UINT8_MAX)
4269 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4270 if (idxRegBase != UINT8_MAX)
4271 iemNativeRegFreeTmp(pReNative, idxRegBase);
4272 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4273 return off;
4274}
4275
4276
4277#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4278 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4279 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4280
4281#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4282 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4283 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4284
4285#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4286 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4287 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
4288
4289/**
4290 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
4291 *
4292 * @returns New off.
4293 * @param pReNative .
4294 * @param off .
4295 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
4296 * bit 4 to REX.X. The two bits are part of the
4297 * REG sub-field, which isn't needed in this
4298 * function.
4299 * @param uSibAndRspOffset Two parts:
4300 * - The first 8 bits make up the SIB byte.
4301 * - The next 8 bits are the fixed RSP/ESP offset
4302 * in case of a pop [xSP].
4303 * @param u32Disp The displacement byte/word/dword, if any.
4304 * @param cbInstr The size of the fully decoded instruction. Used
4305 * for RIP relative addressing.
4306 * @param idxVarRet The result variable number.
4307 * @param f64Bit Whether to use a 64-bit or 32-bit address size
4308 * when calculating the address.
4309 *
4310 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
4311 */
4312DECL_INLINE_THROW(uint32_t)
4313iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
4314 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
4315{
4316 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4317
4318 /*
4319 * Special case the rip + disp32 form first.
4320 */
4321 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4322 {
4323#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4324 /* Need to take the current PC offset into account for the displacement, no need to flush here
4325 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
4326 u32Disp += pReNative->Core.offPc;
4327#endif
4328
4329 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4330 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
4331 kIemNativeGstRegUse_ReadOnly);
4332#ifdef RT_ARCH_AMD64
4333 if (f64Bit)
4334 {
4335 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
4336 if ((int32_t)offFinalDisp == offFinalDisp)
4337 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
4338 else
4339 {
4340 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
4341 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
4342 }
4343 }
4344 else
4345 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
4346
4347#elif defined(RT_ARCH_ARM64)
4348 if (f64Bit)
4349 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4350 (int64_t)(int32_t)u32Disp + cbInstr);
4351 else
4352 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4353 (int32_t)u32Disp + cbInstr);
4354
4355#else
4356# error "Port me!"
4357#endif
4358 iemNativeRegFreeTmp(pReNative, idxRegPc);
4359 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4360 return off;
4361 }
4362
4363 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4364 int64_t i64EffAddr = 0;
4365 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4366 {
4367 case 0: break;
4368 case 1: i64EffAddr = (int8_t)u32Disp; break;
4369 case 2: i64EffAddr = (int32_t)u32Disp; break;
4370 default: AssertFailed();
4371 }
4372
4373 /* Get the register (or SIB) value. */
4374 uint8_t idxGstRegBase = UINT8_MAX;
4375 uint8_t idxGstRegIndex = UINT8_MAX;
4376 uint8_t cShiftIndex = 0;
4377 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
4378 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
4379 else /* SIB: */
4380 {
4381 /* index /w scaling . */
4382 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4383 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4384 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
4385 if (idxGstRegIndex == 4)
4386 {
4387 /* no index */
4388 cShiftIndex = 0;
4389 idxGstRegIndex = UINT8_MAX;
4390 }
4391
4392 /* base */
4393 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
4394 if (idxGstRegBase == 4)
4395 {
4396 /* pop [rsp] hack */
4397 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
4398 }
4399 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
4400 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
4401 {
4402 /* mod=0 and base=5 -> disp32, no base reg. */
4403 Assert(i64EffAddr == 0);
4404 i64EffAddr = (int32_t)u32Disp;
4405 idxGstRegBase = UINT8_MAX;
4406 }
4407 }
4408
4409 /*
4410 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4411 * the start of the function.
4412 */
4413 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4414 {
4415 if (f64Bit)
4416 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
4417 else
4418 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
4419 return off;
4420 }
4421
4422 /*
4423 * Now emit code that calculates:
4424 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4425 * or if !f64Bit:
4426 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4427 */
4428 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4429 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4430 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4431 kIemNativeGstRegUse_ReadOnly);
4432 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4433 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4434 kIemNativeGstRegUse_ReadOnly);
4435
4436 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4437 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4438 {
4439 idxRegBase = idxRegIndex;
4440 idxRegIndex = UINT8_MAX;
4441 }
4442
4443#ifdef RT_ARCH_AMD64
4444 uint8_t bFinalAdj;
4445 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
4446 bFinalAdj = 0; /* likely */
4447 else
4448 {
4449 /* pop [rsp] with a problematic disp32 value. Split out the
4450 RSP offset and add it separately afterwards (bFinalAdj). */
4451 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
4452 Assert(idxGstRegBase == X86_GREG_xSP);
4453 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
4454 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
4455 Assert(bFinalAdj != 0);
4456 i64EffAddr -= bFinalAdj;
4457 Assert((int32_t)i64EffAddr == i64EffAddr);
4458 }
4459 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
4460//pReNative->pInstrBuf[off++] = 0xcc;
4461
4462 if (idxRegIndex == UINT8_MAX)
4463 {
4464 if (u32EffAddr == 0)
4465 {
4466 /* mov ret, base */
4467 if (f64Bit)
4468 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
4469 else
4470 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4471 }
4472 else
4473 {
4474 /* lea ret, [base + disp32] */
4475 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4476 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4477 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
4478 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4479 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4480 | (f64Bit ? X86_OP_REX_W : 0);
4481 pbCodeBuf[off++] = 0x8d;
4482 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4483 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4484 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4485 else
4486 {
4487 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4488 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4489 }
4490 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4491 if (bMod == X86_MOD_MEM4)
4492 {
4493 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4494 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4495 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4496 }
4497 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4498 }
4499 }
4500 else
4501 {
4502 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4503 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4504 if (idxRegBase == UINT8_MAX)
4505 {
4506 /* lea ret, [(index64 << cShiftIndex) + disp32] */
4507 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
4508 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4509 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
4510 | (f64Bit ? X86_OP_REX_W : 0);
4511 pbCodeBuf[off++] = 0x8d;
4512 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4513 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4514 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4515 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4516 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4517 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4518 }
4519 else
4520 {
4521 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4522 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4523 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4524 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4525 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
4526 | (f64Bit ? X86_OP_REX_W : 0);
4527 pbCodeBuf[off++] = 0x8d;
4528 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4529 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4530 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4531 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4532 if (bMod != X86_MOD_MEM0)
4533 {
4534 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4535 if (bMod == X86_MOD_MEM4)
4536 {
4537 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4538 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4539 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4540 }
4541 }
4542 }
4543 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4544 }
4545
4546 if (!bFinalAdj)
4547 { /* likely */ }
4548 else
4549 {
4550 Assert(f64Bit);
4551 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
4552 }
4553
4554#elif defined(RT_ARCH_ARM64)
4555 if (i64EffAddr == 0)
4556 {
4557 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4558 if (idxRegIndex == UINT8_MAX)
4559 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
4560 else if (idxRegBase != UINT8_MAX)
4561 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
4562 f64Bit, false /*fSetFlags*/, cShiftIndex);
4563 else
4564 {
4565 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
4566 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
4567 }
4568 }
4569 else
4570 {
4571 if (f64Bit)
4572 { /* likely */ }
4573 else
4574 i64EffAddr = (int32_t)i64EffAddr;
4575
4576 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
4577 {
4578 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4579 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
4580 }
4581 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
4582 {
4583 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4584 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
4585 }
4586 else
4587 {
4588 if (f64Bit)
4589 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
4590 else
4591 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
4592 if (idxRegBase != UINT8_MAX)
4593 {
4594 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4595 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
4596 }
4597 }
4598 if (idxRegIndex != UINT8_MAX)
4599 {
4600 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4601 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
4602 f64Bit, false /*fSetFlags*/, cShiftIndex);
4603 }
4604 }
4605
4606#else
4607# error "port me"
4608#endif
4609
4610 if (idxRegIndex != UINT8_MAX)
4611 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4612 if (idxRegBase != UINT8_MAX)
4613 iemNativeRegFreeTmp(pReNative, idxRegBase);
4614 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4615 return off;
4616}
4617
4618
4619/*********************************************************************************************************************************
4620* Memory fetches and stores common *
4621*********************************************************************************************************************************/
4622
4623typedef enum IEMNATIVEMITMEMOP
4624{
4625 kIemNativeEmitMemOp_Store = 0,
4626 kIemNativeEmitMemOp_Fetch,
4627 kIemNativeEmitMemOp_Fetch_Zx_U16,
4628 kIemNativeEmitMemOp_Fetch_Zx_U32,
4629 kIemNativeEmitMemOp_Fetch_Zx_U64,
4630 kIemNativeEmitMemOp_Fetch_Sx_U16,
4631 kIemNativeEmitMemOp_Fetch_Sx_U32,
4632 kIemNativeEmitMemOp_Fetch_Sx_U64
4633} IEMNATIVEMITMEMOP;
4634
4635/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
4636 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
4637 * (with iSegReg = UINT8_MAX). */
4638DECL_INLINE_THROW(uint32_t)
4639iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
4640 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
4641 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
4642{
4643 /*
4644 * Assert sanity.
4645 */
4646 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
4647 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
4648 Assert( enmOp != kIemNativeEmitMemOp_Store
4649 || pVarValue->enmKind == kIemNativeVarKind_Immediate
4650 || pVarValue->enmKind == kIemNativeVarKind_Stack);
4651 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
4652 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
4653 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
4654 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
4655 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4656 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
4657 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
4658 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
4659#ifdef VBOX_STRICT
4660 if (iSegReg == UINT8_MAX)
4661 {
4662 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
4663 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
4664 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
4665 switch (cbMem)
4666 {
4667 case 1:
4668 Assert( pfnFunction
4669 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
4670 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
4671 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
4672 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
4673 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
4674 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
4675 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
4676 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
4677 : UINT64_C(0xc000b000a0009000) ));
4678 break;
4679 case 2:
4680 Assert( pfnFunction
4681 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
4682 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
4683 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
4684 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
4685 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
4686 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
4687 : UINT64_C(0xc000b000a0009000) ));
4688 break;
4689 case 4:
4690 Assert( pfnFunction
4691 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
4692 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
4693 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
4694 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
4695 : UINT64_C(0xc000b000a0009000) ));
4696 break;
4697 case 8:
4698 Assert( pfnFunction
4699 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
4700 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
4701 : UINT64_C(0xc000b000a0009000) ));
4702 break;
4703 }
4704 }
4705 else
4706 {
4707 Assert(iSegReg < 6);
4708 switch (cbMem)
4709 {
4710 case 1:
4711 Assert( pfnFunction
4712 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
4713 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
4714 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
4715 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
4716 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
4717 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
4718 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
4719 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
4720 : UINT64_C(0xc000b000a0009000) ));
4721 break;
4722 case 2:
4723 Assert( pfnFunction
4724 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
4725 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
4726 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
4727 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
4728 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
4729 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
4730 : UINT64_C(0xc000b000a0009000) ));
4731 break;
4732 case 4:
4733 Assert( pfnFunction
4734 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
4735 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
4736 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
4737 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
4738 : UINT64_C(0xc000b000a0009000) ));
4739 break;
4740 case 8:
4741 Assert( pfnFunction
4742 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
4743 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
4744 : UINT64_C(0xc000b000a0009000) ));
4745 break;
4746 }
4747 }
4748#endif
4749
4750#ifdef VBOX_STRICT
4751 /*
4752 * Check that the fExec flags we've got make sense.
4753 */
4754 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
4755#endif
4756
4757 /*
4758 * To keep things simple we have to commit any pending writes first as we
4759 * may end up making calls.
4760 */
4761 /** @todo we could postpone this till we make the call and reload the
4762 * registers after returning from the call. Not sure if that's sensible or
4763 * not, though. */
4764#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4765 off = iemNativeRegFlushPendingWrites(pReNative, off);
4766#else
4767 /* The program counter is treated differently for now. */
4768 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
4769#endif
4770
4771#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
4772 /*
4773 * Move/spill/flush stuff out of call-volatile registers.
4774 * This is the easy way out. We could contain this to the tlb-miss branch
4775 * by saving and restoring active stuff here.
4776 */
4777 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
4778#endif
4779
4780 /*
4781 * Define labels and allocate the result register (trying for the return
4782 * register if we can).
4783 */
4784 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
4785 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
4786 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
4787 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
4788 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
4789 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
4790 uint8_t const idxRegValueStore = !TlbState.fSkip
4791 && enmOp == kIemNativeEmitMemOp_Store
4792 && pVarValue->enmKind != kIemNativeVarKind_Immediate
4793 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
4794 : UINT8_MAX;
4795 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
4796 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
4797 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
4798 : UINT32_MAX;
4799
4800 /*
4801 * Jump to the TLB lookup code.
4802 */
4803 if (!TlbState.fSkip)
4804 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
4805
4806 /*
4807 * TlbMiss:
4808 *
4809 * Call helper to do the fetching.
4810 * We flush all guest register shadow copies here.
4811 */
4812 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
4813
4814#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4815 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4816#else
4817 RT_NOREF(idxInstr);
4818#endif
4819
4820#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4821 if (pReNative->Core.offPc)
4822 {
4823 /*
4824 * Update the program counter but restore it at the end of the TlbMiss branch.
4825 * This should allow delaying more program counter updates for the TlbLookup and hit paths
4826 * which are hopefully much more frequent, reducing the amount of memory accesses.
4827 */
4828 /* Allocate a temporary PC register. */
4829 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4830
4831 /* Perform the addition and store the result. */
4832 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
4833 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4834
4835 /* Free and flush the PC register. */
4836 iemNativeRegFreeTmp(pReNative, idxPcReg);
4837 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
4838 }
4839#endif
4840
4841#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
4842 /* Save variables in volatile registers. */
4843 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
4844 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
4845 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
4846 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
4847#endif
4848
4849 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
4850 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4851 if (enmOp == kIemNativeEmitMemOp_Store)
4852 {
4853 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
4854 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
4855#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
4856 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
4857#else
4858 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
4859 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
4860#endif
4861 }
4862
4863 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
4864 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
4865#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
4866 fVolGregMask);
4867#else
4868 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
4869#endif
4870
4871 if (iSegReg != UINT8_MAX)
4872 {
4873 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
4874 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
4875 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
4876 }
4877
4878 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
4879 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4880
4881 /* Done setting up parameters, make the call. */
4882 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
4883
4884 /*
4885 * Put the result in the right register if this is a fetch.
4886 */
4887 if (enmOp != kIemNativeEmitMemOp_Store)
4888 {
4889 Assert(idxRegValueFetch == pVarValue->idxReg);
4890 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
4891 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
4892 }
4893
4894#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
4895 /* Restore variables and guest shadow registers to volatile registers. */
4896 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
4897 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
4898#endif
4899
4900#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4901 if (pReNative->Core.offPc)
4902 {
4903 /*
4904 * Time to restore the program counter to its original value.
4905 */
4906 /* Allocate a temporary PC register. */
4907 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4908
4909 /* Restore the original value. */
4910 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
4911 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4912
4913 /* Free and flush the PC register. */
4914 iemNativeRegFreeTmp(pReNative, idxPcReg);
4915 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
4916 }
4917#endif
4918
4919#ifdef IEMNATIVE_WITH_TLB_LOOKUP
4920 if (!TlbState.fSkip)
4921 {
4922 /* end of TlbMiss - Jump to the done label. */
4923 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
4924 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
4925
4926 /*
4927 * TlbLookup:
4928 */
4929 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
4930 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
4931 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
4932
4933 /*
4934 * Emit code to do the actual storing / fetching.
4935 */
4936 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
4937# ifdef VBOX_WITH_STATISTICS
4938 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
4939 enmOp == kIemNativeEmitMemOp_Store
4940 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
4941 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
4942# endif
4943 switch (enmOp)
4944 {
4945 case kIemNativeEmitMemOp_Store:
4946 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
4947 {
4948 switch (cbMem)
4949 {
4950 case 1:
4951 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
4952 break;
4953 case 2:
4954 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
4955 break;
4956 case 4:
4957 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
4958 break;
4959 case 8:
4960 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
4961 break;
4962 default:
4963 AssertFailed();
4964 }
4965 }
4966 else
4967 {
4968 switch (cbMem)
4969 {
4970 case 1:
4971 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
4972 idxRegMemResult, TlbState.idxReg1);
4973 break;
4974 case 2:
4975 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
4976 idxRegMemResult, TlbState.idxReg1);
4977 break;
4978 case 4:
4979 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
4980 idxRegMemResult, TlbState.idxReg1);
4981 break;
4982 case 8:
4983 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
4984 idxRegMemResult, TlbState.idxReg1);
4985 break;
4986 default:
4987 AssertFailed();
4988 }
4989 }
4990 break;
4991
4992 case kIemNativeEmitMemOp_Fetch:
4993 case kIemNativeEmitMemOp_Fetch_Zx_U16:
4994 case kIemNativeEmitMemOp_Fetch_Zx_U32:
4995 case kIemNativeEmitMemOp_Fetch_Zx_U64:
4996 switch (cbMem)
4997 {
4998 case 1:
4999 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5000 break;
5001 case 2:
5002 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5003 break;
5004 case 4:
5005 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5006 break;
5007 case 8:
5008 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5009 break;
5010 default:
5011 AssertFailed();
5012 }
5013 break;
5014
5015 case kIemNativeEmitMemOp_Fetch_Sx_U16:
5016 Assert(cbMem == 1);
5017 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5018 break;
5019
5020 case kIemNativeEmitMemOp_Fetch_Sx_U32:
5021 Assert(cbMem == 1 || cbMem == 2);
5022 if (cbMem == 1)
5023 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5024 else
5025 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5026 break;
5027
5028 case kIemNativeEmitMemOp_Fetch_Sx_U64:
5029 switch (cbMem)
5030 {
5031 case 1:
5032 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5033 break;
5034 case 2:
5035 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5036 break;
5037 case 4:
5038 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5039 break;
5040 default:
5041 AssertFailed();
5042 }
5043 break;
5044
5045 default:
5046 AssertFailed();
5047 }
5048
5049 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
5050
5051 /*
5052 * TlbDone:
5053 */
5054 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
5055
5056 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
5057
5058# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5059 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
5060 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5061# endif
5062 }
5063#else
5064 RT_NOREF(fAlignMask, idxLabelTlbMiss);
5065#endif
5066
5067 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
5068 iemNativeVarRegisterRelease(pReNative, idxVarValue);
5069 return off;
5070}
5071
5072
5073
5074/*********************************************************************************************************************************
5075* Memory fetches (IEM_MEM_FETCH_XXX). *
5076*********************************************************************************************************************************/
5077
5078/* 8-bit segmented: */
5079#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
5080 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
5081 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5082 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5083
5084#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5085 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5086 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5087 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5088
5089#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5090 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5091 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5092 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5093
5094#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5095 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5096 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5097 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5098
5099#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5100 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5101 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5102 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5103
5104#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5105 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5106 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5107 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5108
5109#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5110 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5111 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5112 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5113
5114/* 16-bit segmented: */
5115#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5116 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5117 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5118 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5119
5120#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5121 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5122 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5123 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5124
5125#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5126 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5127 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5128 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5129
5130#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5131 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5132 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5133 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5134
5135#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5136 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5137 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5138 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5139
5140#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5141 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5142 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5143 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5144
5145
5146/* 32-bit segmented: */
5147#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5148 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5149 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5150 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5151
5152#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5153 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5154 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5155 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5156
5157#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5158 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5159 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5160 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5161
5162#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5163 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5164 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5165 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5166
5167
5168/* 64-bit segmented: */
5169#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5170 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5171 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5172 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5173
5174
5175
5176/* 8-bit flat: */
5177#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
5178 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
5179 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5180 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5181
5182#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
5183 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5184 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5185 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5186
5187#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
5188 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5189 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5190 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5191
5192#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
5193 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5194 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5195 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5196
5197#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
5198 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5199 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5200 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5201
5202#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
5203 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5204 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5205 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5206
5207#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
5208 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5209 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5210 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5211
5212
5213/* 16-bit flat: */
5214#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
5215 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5216 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5217 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5218
5219#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
5220 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5221 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5222 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5223
5224#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
5225 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5226 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5227 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5228
5229#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
5230 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5231 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5232 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5233
5234#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
5235 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5236 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5237 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5238
5239#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
5240 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5241 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5242 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5243
5244/* 32-bit flat: */
5245#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
5246 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5247 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5248 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5249
5250#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
5251 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5252 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5253 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5254
5255#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
5256 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5257 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5258 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5259
5260#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
5261 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5262 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5263 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5264
5265/* 64-bit flat: */
5266#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
5267 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5268 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5269 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
5270
5271
5272
5273/*********************************************************************************************************************************
5274* Memory stores (IEM_MEM_STORE_XXX). *
5275*********************************************************************************************************************************/
5276
5277#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
5278 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
5279 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
5280 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
5281
5282#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
5283 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
5284 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
5285 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
5286
5287#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
5288 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
5289 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
5290 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
5291
5292#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
5293 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
5294 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
5295 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
5296
5297
5298#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
5299 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
5300 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
5301 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
5302
5303#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
5304 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
5305 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
5306 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
5307
5308#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
5309 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
5310 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
5311 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
5312
5313#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
5314 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
5315 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
5316 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
5317
5318
5319#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
5320 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
5321 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
5322
5323#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
5324 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
5325 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
5326
5327#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
5328 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
5329 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
5330
5331#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
5332 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
5333 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
5334
5335
5336#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
5337 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
5338 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
5339
5340#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
5341 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
5342 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
5343
5344#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
5345 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
5346 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
5347
5348#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
5349 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
5350 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
5351
5352/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
5353 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
5354DECL_INLINE_THROW(uint32_t)
5355iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
5356 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
5357{
5358 /*
5359 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
5360 * to do the grunt work.
5361 */
5362 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
5363 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
5364 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
5365 pfnFunction, idxInstr);
5366 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
5367 return off;
5368}
5369
5370
5371
5372/*********************************************************************************************************************************
5373* Stack Accesses. *
5374*********************************************************************************************************************************/
5375/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
5376#define IEM_MC_PUSH_U16(a_u16Value) \
5377 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
5378 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
5379#define IEM_MC_PUSH_U32(a_u32Value) \
5380 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
5381 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
5382#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
5383 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
5384 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
5385#define IEM_MC_PUSH_U64(a_u64Value) \
5386 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
5387 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
5388
5389#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
5390 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
5391 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
5392#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
5393 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
5394 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
5395#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
5396 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
5397 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
5398
5399#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
5400 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
5401 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
5402#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
5403 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
5404 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
5405
5406
5407DECL_FORCE_INLINE_THROW(uint32_t)
5408iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
5409{
5410 /* Use16BitSp: */
5411#ifdef RT_ARCH_AMD64
5412 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
5413 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
5414#else
5415 /* sub regeff, regrsp, #cbMem */
5416 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
5417 /* and regeff, regeff, #0xffff */
5418 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
5419 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
5420 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
5421 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
5422#endif
5423 return off;
5424}
5425
5426
5427DECL_FORCE_INLINE(uint32_t)
5428iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
5429{
5430 /* Use32BitSp: */
5431 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
5432 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
5433 return off;
5434}
5435
5436
5437/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
5438DECL_INLINE_THROW(uint32_t)
5439iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
5440 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
5441{
5442 /*
5443 * Assert sanity.
5444 */
5445 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
5446 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
5447#ifdef VBOX_STRICT
5448 if (RT_BYTE2(cBitsVarAndFlat) != 0)
5449 {
5450 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
5451 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
5452 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
5453 Assert( pfnFunction
5454 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
5455 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
5456 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
5457 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
5458 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
5459 : UINT64_C(0xc000b000a0009000) ));
5460 }
5461 else
5462 Assert( pfnFunction
5463 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
5464 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
5465 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
5466 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
5467 : UINT64_C(0xc000b000a0009000) ));
5468#endif
5469
5470#ifdef VBOX_STRICT
5471 /*
5472 * Check that the fExec flags we've got make sense.
5473 */
5474 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
5475#endif
5476
5477 /*
5478 * To keep things simple we have to commit any pending writes first as we
5479 * may end up making calls.
5480 */
5481 /** @todo we could postpone this till we make the call and reload the
5482 * registers after returning from the call. Not sure if that's sensible or
5483 * not, though. */
5484 off = iemNativeRegFlushPendingWrites(pReNative, off);
5485
5486 /*
5487 * First we calculate the new RSP and the effective stack pointer value.
5488 * For 64-bit mode and flat 32-bit these two are the same.
5489 * (Code structure is very similar to that of PUSH)
5490 */
5491 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
5492 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
5493 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
5494 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
5495 ? cbMem : sizeof(uint16_t);
5496 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
5497 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
5498 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
5499 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
5500 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
5501 if (cBitsFlat != 0)
5502 {
5503 Assert(idxRegEffSp == idxRegRsp);
5504 Assert(cBitsFlat == 32 || cBitsFlat == 64);
5505 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
5506 if (cBitsFlat == 64)
5507 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
5508 else
5509 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
5510 }
5511 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
5512 {
5513 Assert(idxRegEffSp != idxRegRsp);
5514 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
5515 kIemNativeGstRegUse_ReadOnly);
5516#ifdef RT_ARCH_AMD64
5517 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5518#else
5519 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5520#endif
5521 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
5522 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
5523 offFixupJumpToUseOtherBitSp = off;
5524 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
5525 {
5526 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
5527 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
5528 }
5529 else
5530 {
5531 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
5532 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
5533 }
5534 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5535 }
5536 /* SpUpdateEnd: */
5537 uint32_t const offLabelSpUpdateEnd = off;
5538
5539 /*
5540 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
5541 * we're skipping lookup).
5542 */
5543 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
5544 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
5545 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
5546 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
5547 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
5548 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
5549 : UINT32_MAX;
5550 uint8_t const idxRegValue = !TlbState.fSkip
5551 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5552 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
5553 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
5554 : UINT8_MAX;
5555 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
5556
5557
5558 if (!TlbState.fSkip)
5559 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
5560 else
5561 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
5562
5563 /*
5564 * Use16BitSp:
5565 */
5566 if (cBitsFlat == 0)
5567 {
5568#ifdef RT_ARCH_AMD64
5569 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5570#else
5571 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5572#endif
5573 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
5574 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
5575 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
5576 else
5577 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
5578 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
5579 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5580 }
5581
5582 /*
5583 * TlbMiss:
5584 *
5585 * Call helper to do the pushing.
5586 */
5587 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
5588
5589#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5590 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5591#else
5592 RT_NOREF(idxInstr);
5593#endif
5594
5595 /* Save variables in volatile registers. */
5596 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
5597 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
5598 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
5599 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
5600 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
5601
5602 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
5603 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
5604 {
5605 /* Swap them using ARG0 as temp register: */
5606 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
5607 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
5608 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
5609 }
5610 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
5611 {
5612 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
5613 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
5614 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5615
5616 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
5617 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
5618 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
5619 }
5620 else
5621 {
5622 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
5623 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
5624
5625 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
5626 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
5627 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
5628 }
5629
5630 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
5631 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5632
5633 /* Done setting up parameters, make the call. */
5634 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
5635
5636 /* Restore variables and guest shadow registers to volatile registers. */
5637 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
5638 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
5639
5640#ifdef IEMNATIVE_WITH_TLB_LOOKUP
5641 if (!TlbState.fSkip)
5642 {
5643 /* end of TlbMiss - Jump to the done label. */
5644 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
5645 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
5646
5647 /*
5648 * TlbLookup:
5649 */
5650 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
5651 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
5652
5653 /*
5654 * Emit code to do the actual storing / fetching.
5655 */
5656 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
5657# ifdef VBOX_WITH_STATISTICS
5658 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
5659 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
5660# endif
5661 if (idxRegValue != UINT8_MAX)
5662 {
5663 switch (cbMemAccess)
5664 {
5665 case 2:
5666 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
5667 break;
5668 case 4:
5669 if (!fIsIntelSeg)
5670 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
5671 else
5672 {
5673 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
5674 PUSH FS in real mode, so we have to try emulate that here.
5675 We borrow the now unused idxReg1 from the TLB lookup code here. */
5676 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
5677 kIemNativeGstReg_EFlags);
5678 if (idxRegEfl != UINT8_MAX)
5679 {
5680#ifdef ARCH_AMD64
5681 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
5682 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
5683 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
5684#else
5685 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
5686 off, TlbState.idxReg1, idxRegEfl,
5687 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
5688#endif
5689 iemNativeRegFreeTmp(pReNative, idxRegEfl);
5690 }
5691 else
5692 {
5693 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
5694 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5695 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
5696 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
5697 }
5698 /* ASSUMES the upper half of idxRegValue is ZERO. */
5699 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
5700 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
5701 }
5702 break;
5703 case 8:
5704 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
5705 break;
5706 default:
5707 AssertFailed();
5708 }
5709 }
5710 else
5711 {
5712 switch (cbMemAccess)
5713 {
5714 case 2:
5715 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
5716 idxRegMemResult, TlbState.idxReg1);
5717 break;
5718 case 4:
5719 Assert(!fIsSegReg);
5720 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
5721 idxRegMemResult, TlbState.idxReg1);
5722 break;
5723 case 8:
5724 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
5725 break;
5726 default:
5727 AssertFailed();
5728 }
5729 }
5730
5731 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
5732 TlbState.freeRegsAndReleaseVars(pReNative);
5733
5734 /*
5735 * TlbDone:
5736 *
5737 * Commit the new RSP value.
5738 */
5739 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
5740 }
5741#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
5742
5743 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
5744 iemNativeRegFreeTmp(pReNative, idxRegRsp);
5745 if (idxRegEffSp != idxRegRsp)
5746 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
5747
5748 /* The value variable is implictly flushed. */
5749 if (idxRegValue != UINT8_MAX)
5750 iemNativeVarRegisterRelease(pReNative, idxVarValue);
5751 iemNativeVarFreeLocal(pReNative, idxVarValue);
5752
5753 return off;
5754}
5755
5756
5757
5758/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
5759#define IEM_MC_POP_GREG_U16(a_iGReg) \
5760 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
5761 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
5762#define IEM_MC_POP_GREG_U32(a_iGReg) \
5763 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
5764 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
5765#define IEM_MC_POP_GREG_U64(a_iGReg) \
5766 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
5767 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
5768
5769#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
5770 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
5771 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
5772#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
5773 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
5774 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
5775
5776#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
5777 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
5778 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
5779#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
5780 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
5781 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
5782
5783
5784DECL_FORCE_INLINE_THROW(uint32_t)
5785iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
5786 uint8_t idxRegTmp)
5787{
5788 /* Use16BitSp: */
5789#ifdef RT_ARCH_AMD64
5790 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
5791 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
5792 RT_NOREF(idxRegTmp);
5793#else
5794 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
5795 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
5796 /* add tmp, regrsp, #cbMem */
5797 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
5798 /* and tmp, tmp, #0xffff */
5799 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
5800 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
5801 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
5802 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
5803#endif
5804 return off;
5805}
5806
5807
5808DECL_FORCE_INLINE(uint32_t)
5809iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
5810{
5811 /* Use32BitSp: */
5812 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
5813 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
5814 return off;
5815}
5816
5817
5818/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
5819DECL_INLINE_THROW(uint32_t)
5820iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
5821 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
5822{
5823 /*
5824 * Assert sanity.
5825 */
5826 Assert(idxGReg < 16);
5827#ifdef VBOX_STRICT
5828 if (RT_BYTE2(cBitsVarAndFlat) != 0)
5829 {
5830 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
5831 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
5832 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
5833 Assert( pfnFunction
5834 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
5835 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
5836 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
5837 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
5838 : UINT64_C(0xc000b000a0009000) ));
5839 }
5840 else
5841 Assert( pfnFunction
5842 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
5843 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
5844 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
5845 : UINT64_C(0xc000b000a0009000) ));
5846#endif
5847
5848#ifdef VBOX_STRICT
5849 /*
5850 * Check that the fExec flags we've got make sense.
5851 */
5852 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
5853#endif
5854
5855 /*
5856 * To keep things simple we have to commit any pending writes first as we
5857 * may end up making calls.
5858 */
5859 off = iemNativeRegFlushPendingWrites(pReNative, off);
5860
5861 /*
5862 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
5863 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
5864 * directly as the effective stack pointer.
5865 * (Code structure is very similar to that of PUSH)
5866 */
5867 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
5868 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
5869 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
5870 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
5871 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
5872 /** @todo can do a better job picking the register here. For cbMem >= 4 this
5873 * will be the resulting register value. */
5874 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
5875
5876 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
5877 if (cBitsFlat != 0)
5878 {
5879 Assert(idxRegEffSp == idxRegRsp);
5880 Assert(cBitsFlat == 32 || cBitsFlat == 64);
5881 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
5882 }
5883 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
5884 {
5885 Assert(idxRegEffSp != idxRegRsp);
5886 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
5887 kIemNativeGstRegUse_ReadOnly);
5888#ifdef RT_ARCH_AMD64
5889 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5890#else
5891 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5892#endif
5893 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
5894 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
5895 offFixupJumpToUseOtherBitSp = off;
5896 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
5897 {
5898/** @todo can skip idxRegRsp updating when popping ESP. */
5899 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
5900 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
5901 }
5902 else
5903 {
5904 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
5905 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
5906 }
5907 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5908 }
5909 /* SpUpdateEnd: */
5910 uint32_t const offLabelSpUpdateEnd = off;
5911
5912 /*
5913 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
5914 * we're skipping lookup).
5915 */
5916 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
5917 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
5918 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
5919 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
5920 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
5921 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
5922 : UINT32_MAX;
5923
5924 if (!TlbState.fSkip)
5925 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
5926 else
5927 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
5928
5929 /*
5930 * Use16BitSp:
5931 */
5932 if (cBitsFlat == 0)
5933 {
5934#ifdef RT_ARCH_AMD64
5935 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5936#else
5937 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5938#endif
5939 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
5940 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
5941 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
5942 else
5943 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
5944 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
5945 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5946 }
5947
5948 /*
5949 * TlbMiss:
5950 *
5951 * Call helper to do the pushing.
5952 */
5953 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
5954
5955#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5956 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5957#else
5958 RT_NOREF(idxInstr);
5959#endif
5960
5961 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
5962 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
5963 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
5964 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
5965
5966
5967 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
5968 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
5969 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
5970
5971 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
5972 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5973
5974 /* Done setting up parameters, make the call. */
5975 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
5976
5977 /* Move the return register content to idxRegMemResult. */
5978 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
5979 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
5980
5981 /* Restore variables and guest shadow registers to volatile registers. */
5982 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
5983 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
5984
5985#ifdef IEMNATIVE_WITH_TLB_LOOKUP
5986 if (!TlbState.fSkip)
5987 {
5988 /* end of TlbMiss - Jump to the done label. */
5989 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
5990 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
5991
5992 /*
5993 * TlbLookup:
5994 */
5995 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
5996 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
5997
5998 /*
5999 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
6000 */
6001 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6002# ifdef VBOX_WITH_STATISTICS
6003 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6004 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6005# endif
6006 switch (cbMem)
6007 {
6008 case 2:
6009 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6010 break;
6011 case 4:
6012 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6013 break;
6014 case 8:
6015 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6016 break;
6017 default:
6018 AssertFailed();
6019 }
6020
6021 TlbState.freeRegsAndReleaseVars(pReNative);
6022
6023 /*
6024 * TlbDone:
6025 *
6026 * Set the new RSP value (FLAT accesses needs to calculate it first) and
6027 * commit the popped register value.
6028 */
6029 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6030 }
6031#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6032
6033 if (idxGReg != X86_GREG_xSP)
6034 {
6035 /* Set the register. */
6036 if (cbMem >= sizeof(uint32_t))
6037 {
6038#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6039 AssertMsg( pReNative->idxCurCall == 0
6040 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
6041 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
6042#endif
6043 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
6044 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
6045 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6046 }
6047 else
6048 {
6049 Assert(cbMem == sizeof(uint16_t));
6050 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
6051 kIemNativeGstRegUse_ForUpdate);
6052 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
6053 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6054 iemNativeRegFreeTmp(pReNative, idxRegDst);
6055 }
6056
6057 /* Complete RSP calculation for FLAT mode. */
6058 if (idxRegEffSp == idxRegRsp)
6059 {
6060 if (cBitsFlat == 64)
6061 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6062 else
6063 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6064 }
6065 }
6066 else
6067 {
6068 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
6069 if (cbMem == sizeof(uint64_t))
6070 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
6071 else if (cbMem == sizeof(uint32_t))
6072 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
6073 else
6074 {
6075 if (idxRegEffSp == idxRegRsp)
6076 {
6077 if (cBitsFlat == 64)
6078 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6079 else
6080 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6081 }
6082 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
6083 }
6084 }
6085 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
6086
6087 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6088 if (idxRegEffSp != idxRegRsp)
6089 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6090 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6091
6092 return off;
6093}
6094
6095
6096
6097/*********************************************************************************************************************************
6098* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
6099*********************************************************************************************************************************/
6100
6101#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6102 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6103 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
6104 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
6105
6106#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6107 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6108 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
6109 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
6110
6111#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6112 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6113 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
6114 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
6115
6116#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6117 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6118 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
6119 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
6120
6121
6122#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6123 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6124 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6125 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
6126
6127#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6128 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6129 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6130 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
6131
6132#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6133 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6134 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6135 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
6136
6137#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6138 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6139 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6140 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
6141
6142#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6143 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
6144 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6145 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
6146
6147
6148#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6149 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6150 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6151 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
6152
6153#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6154 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6155 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6156 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
6157
6158#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6159 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6160 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6161 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
6162
6163#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6164 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6165 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6166 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
6167
6168#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6169 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
6170 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6171 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
6172
6173
6174#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6175 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6176 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6177 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
6178
6179#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6180 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6181 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6182 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
6183#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6184 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6185 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6186 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
6187
6188#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6189 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6190 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6191 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
6192
6193#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6194 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
6195 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6196 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
6197
6198
6199#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6200 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
6201 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6202 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
6203
6204#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6205 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
6206 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
6207 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
6208
6209
6210#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6211 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6212 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6213 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
6214
6215#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6216 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6217 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6218 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
6219
6220#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6221 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6222 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6223 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
6224
6225#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6226 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6227 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6228 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
6229
6230
6231
6232#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6233 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6234 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
6235 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
6236
6237#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6238 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6239 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
6240 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
6241
6242#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6243 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6244 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
6245 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
6246
6247#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6248 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6249 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
6250 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
6251
6252
6253#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6254 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6255 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6256 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
6257
6258#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6259 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6260 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6261 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
6262
6263#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6264 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6265 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6266 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
6267
6268#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6269 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6270 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6271 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
6272
6273#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
6274 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
6275 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6276 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
6277
6278
6279#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6280 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6281 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6282 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
6283
6284#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6285 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6286 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6287 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
6288
6289#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6290 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6291 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6292 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
6293
6294#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6295 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6296 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6297 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
6298
6299#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
6300 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
6301 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6302 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
6303
6304
6305#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
6306 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6307 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6308 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
6309
6310#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
6311 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6312 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6313 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
6314
6315#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
6316 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6317 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6318 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
6319
6320#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
6321 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6322 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6323 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
6324
6325#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
6326 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
6327 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6328 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
6329
6330
6331#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
6332 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
6333 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6334 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
6335
6336#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
6337 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
6338 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
6339 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
6340
6341
6342#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
6343 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
6344 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6345 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
6346
6347#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
6348 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
6349 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6350 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
6351
6352#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
6353 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
6354 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6355 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
6356
6357#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
6358 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
6359 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6360 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
6361
6362
6363DECL_INLINE_THROW(uint32_t)
6364iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
6365 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
6366 uintptr_t pfnFunction, uint8_t idxInstr)
6367{
6368 /*
6369 * Assert sanity.
6370 */
6371 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
6372 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
6373 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
6374 && pVarMem->cbVar == sizeof(void *),
6375 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6376
6377 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
6378 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
6379 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
6380 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
6381 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6382
6383 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
6384 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
6385 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
6386 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
6387 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6388
6389 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
6390
6391 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6392
6393#ifdef VBOX_STRICT
6394# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
6395 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
6396 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
6397 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
6398 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
6399# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
6400 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
6401 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
6402 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
6403
6404 if (iSegReg == UINT8_MAX)
6405 {
6406 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6407 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6408 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6409 switch (cbMem)
6410 {
6411 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
6412 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
6413 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
6414 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
6415 case 10:
6416 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
6417 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
6418 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
6419 break;
6420 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
6421# if 0
6422 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
6423 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
6424# endif
6425 default: AssertFailed(); break;
6426 }
6427 }
6428 else
6429 {
6430 Assert(iSegReg < 6);
6431 switch (cbMem)
6432 {
6433 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
6434 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
6435 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
6436 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
6437 case 10:
6438 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
6439 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
6440 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
6441 break;
6442 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
6443# if 0
6444 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
6445 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
6446# endif
6447 default: AssertFailed(); break;
6448 }
6449 }
6450# undef IEM_MAP_HLP_FN
6451# undef IEM_MAP_HLP_FN_NO_AT
6452#endif
6453
6454#ifdef VBOX_STRICT
6455 /*
6456 * Check that the fExec flags we've got make sense.
6457 */
6458 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6459#endif
6460
6461 /*
6462 * To keep things simple we have to commit any pending writes first as we
6463 * may end up making calls.
6464 */
6465 off = iemNativeRegFlushPendingWrites(pReNative, off);
6466
6467#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6468 /*
6469 * Move/spill/flush stuff out of call-volatile registers.
6470 * This is the easy way out. We could contain this to the tlb-miss branch
6471 * by saving and restoring active stuff here.
6472 */
6473 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
6474 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
6475#endif
6476
6477 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
6478 while the tlb-miss codepath will temporarily put it on the stack.
6479 Set the the type to stack here so we don't need to do it twice below. */
6480 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
6481 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
6482 /** @todo use a tmp register from TlbState, since they'll be free after tlb
6483 * lookup is done. */
6484
6485 /*
6486 * Define labels and allocate the result register (trying for the return
6487 * register if we can).
6488 */
6489 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6490 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6491 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
6492 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
6493 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
6494 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6495 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6496 : UINT32_MAX;
6497//off=iemNativeEmitBrk(pReNative, off, 0);
6498 /*
6499 * Jump to the TLB lookup code.
6500 */
6501 if (!TlbState.fSkip)
6502 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6503
6504 /*
6505 * TlbMiss:
6506 *
6507 * Call helper to do the fetching.
6508 * We flush all guest register shadow copies here.
6509 */
6510 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
6511
6512#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6513 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6514#else
6515 RT_NOREF(idxInstr);
6516#endif
6517
6518#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6519 /* Save variables in volatile registers. */
6520 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
6521 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6522#endif
6523
6524 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
6525 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
6526#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6527 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
6528#else
6529 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6530#endif
6531
6532 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
6533 if (iSegReg != UINT8_MAX)
6534 {
6535 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
6536 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
6537 }
6538
6539 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
6540 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
6541 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
6542
6543 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6544 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6545
6546 /* Done setting up parameters, make the call. */
6547 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6548
6549 /*
6550 * Put the output in the right registers.
6551 */
6552 Assert(idxRegMemResult == pVarMem->idxReg);
6553 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
6554 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
6555
6556#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6557 /* Restore variables and guest shadow registers to volatile registers. */
6558 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6559 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6560#endif
6561
6562 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
6563 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
6564
6565#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6566 if (!TlbState.fSkip)
6567 {
6568 /* end of tlbsmiss - Jump to the done label. */
6569 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6570 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6571
6572 /*
6573 * TlbLookup:
6574 */
6575 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
6576 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6577# ifdef VBOX_WITH_STATISTICS
6578 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
6579 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
6580# endif
6581
6582 /* [idxVarUnmapInfo] = 0; */
6583 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
6584
6585 /*
6586 * TlbDone:
6587 */
6588 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6589
6590 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
6591
6592# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6593 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
6594 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6595# endif
6596 }
6597#else
6598 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
6599#endif
6600
6601 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
6602 iemNativeVarRegisterRelease(pReNative, idxVarMem);
6603
6604 return off;
6605}
6606
6607
6608#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
6609 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
6610 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
6611
6612#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
6613 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
6614 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
6615
6616#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
6617 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
6618 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
6619
6620#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
6621 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
6622 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
6623
6624DECL_INLINE_THROW(uint32_t)
6625iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
6626 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
6627{
6628 /*
6629 * Assert sanity.
6630 */
6631 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
6632#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
6633 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
6634#endif
6635 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
6636 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
6637 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
6638#ifdef VBOX_STRICT
6639 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
6640 {
6641 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
6642 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
6643 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
6644 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
6645 case IEM_ACCESS_TYPE_WRITE:
6646 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
6647 case IEM_ACCESS_TYPE_READ:
6648 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
6649 default: AssertFailed();
6650 }
6651#else
6652 RT_NOREF(fAccess);
6653#endif
6654
6655 /*
6656 * To keep things simple we have to commit any pending writes first as we
6657 * may end up making calls (there shouldn't be any at this point, so this
6658 * is just for consistency).
6659 */
6660 /** @todo we could postpone this till we make the call and reload the
6661 * registers after returning from the call. Not sure if that's sensible or
6662 * not, though. */
6663 off = iemNativeRegFlushPendingWrites(pReNative, off);
6664
6665 /*
6666 * Move/spill/flush stuff out of call-volatile registers.
6667 *
6668 * We exclude any register holding the bUnmapInfo variable, as we'll be
6669 * checking it after returning from the call and will free it afterwards.
6670 */
6671 /** @todo save+restore active registers and maybe guest shadows in miss
6672 * scenario. */
6673 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
6674 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
6675
6676 /*
6677 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
6678 * to call the unmap helper function.
6679 *
6680 * The likelyhood of it being zero is higher than for the TLB hit when doing
6681 * the mapping, as a TLB miss for an well aligned and unproblematic memory
6682 * access should also end up with a mapping that won't need special unmapping.
6683 */
6684 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
6685 * should speed up things for the pure interpreter as well when TLBs
6686 * are enabled. */
6687#ifdef RT_ARCH_AMD64
6688 if (pVarUnmapInfo->idxReg == UINT8_MAX)
6689 {
6690 /* test byte [rbp - xxx], 0ffh */
6691 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6692 pbCodeBuf[off++] = 0xf6;
6693 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
6694 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
6695 pbCodeBuf[off++] = 0xff;
6696 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6697 }
6698 else
6699#endif
6700 {
6701 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
6702 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
6703 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
6704 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
6705 }
6706 uint32_t const offJmpFixup = off;
6707 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
6708
6709 /*
6710 * Call the unmap helper function.
6711 */
6712#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
6713 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6714#else
6715 RT_NOREF(idxInstr);
6716#endif
6717
6718 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
6719 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
6720 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6721
6722 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6723 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6724
6725 /* Done setting up parameters, make the call. */
6726 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6727
6728 /* The bUnmapInfo variable is implictly free by these MCs. */
6729 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
6730
6731 /*
6732 * Done, just fixup the jump for the non-call case.
6733 */
6734 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
6735
6736 return off;
6737}
6738
6739
6740
6741/*********************************************************************************************************************************
6742* State and Exceptions *
6743*********************************************************************************************************************************/
6744
6745#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
6746#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
6747
6748#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
6749#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
6750#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
6751
6752#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
6753#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
6754#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
6755
6756
6757DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
6758{
6759 /** @todo this needs a lot more work later. */
6760 RT_NOREF(pReNative, fForChange);
6761 return off;
6762}
6763
6764
6765
6766/*********************************************************************************************************************************
6767* Emitters for FPU related operations. *
6768*********************************************************************************************************************************/
6769
6770#define IEM_MC_FETCH_FCW(a_u16Fcw) \
6771 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
6772
6773/** Emits code for IEM_MC_FETCH_FCW. */
6774DECL_INLINE_THROW(uint32_t)
6775iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
6776{
6777 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6778 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
6779
6780 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6781
6782 /* Allocate a temporary FCW register. */
6783 /** @todo eliminate extra register */
6784 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
6785 kIemNativeGstRegUse_ReadOnly);
6786
6787 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
6788
6789 /* Free but don't flush the FCW register. */
6790 iemNativeRegFreeTmp(pReNative, idxFcwReg);
6791 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6792
6793 return off;
6794}
6795
6796
6797#define IEM_MC_FETCH_FSW(a_u16Fsw) \
6798 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
6799
6800/** Emits code for IEM_MC_FETCH_FSW. */
6801DECL_INLINE_THROW(uint32_t)
6802iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
6803{
6804 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6805 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
6806
6807 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
6808 /* Allocate a temporary FSW register. */
6809 /** @todo eliminate extra register */
6810 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
6811 kIemNativeGstRegUse_ReadOnly);
6812
6813 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
6814
6815 /* Free but don't flush the FSW register. */
6816 iemNativeRegFreeTmp(pReNative, idxFswReg);
6817 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6818
6819 return off;
6820}
6821
6822
6823
6824#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6825
6826
6827/*********************************************************************************************************************************
6828* Emitters for SSE/AVX specific operations. *
6829*********************************************************************************************************************************/
6830
6831#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
6832 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
6833
6834/** Emits code for IEM_MC_COPY_XREG_U128. */
6835DECL_INLINE_THROW(uint32_t)
6836iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
6837{
6838 /* Allocate destination and source register. */
6839 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
6840 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
6841 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
6842 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
6843
6844 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
6845 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXRegDst);
6846 /* We don't need to write everything back here as the destination is marked as dirty and will be flushed automatically. */
6847
6848 /* Free but don't flush the source and destination register. */
6849 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
6850 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
6851
6852 return off;
6853}
6854
6855
6856#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
6857 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
6858
6859/** Emits code for IEM_MC_FETCH_XREG_U64. */
6860DECL_INLINE_THROW(uint32_t)
6861iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
6862{
6863 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6864 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
6865
6866 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
6867 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
6868
6869 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6870 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6871
6872 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
6873
6874 /* Free but don't flush the source register. */
6875 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
6876 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6877
6878 return off;
6879}
6880
6881
6882#define IEM_MC_FETCH_XREG_U32(a_u64Value, a_iXReg, a_iDWord) \
6883 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u64Value, a_iXReg, a_iDWord)
6884
6885/** Emits code for IEM_MC_FETCH_XREG_U32. */
6886DECL_INLINE_THROW(uint32_t)
6887iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
6888{
6889 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6890 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
6891
6892 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
6893 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
6894
6895 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6896 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6897
6898 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
6899
6900 /* Free but don't flush the source register. */
6901 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
6902 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6903
6904 return off;
6905}
6906
6907
6908#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
6909 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
6910
6911/** Emits code for IEM_MC_FETCH_XREG_U16. */
6912DECL_INLINE_THROW(uint32_t)
6913iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
6914{
6915 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6916 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
6917
6918 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
6919 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
6920
6921 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6922 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6923
6924 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
6925
6926 /* Free but don't flush the source register. */
6927 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
6928 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6929
6930 return off;
6931}
6932
6933
6934#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
6935 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
6936
6937/** Emits code for IEM_MC_FETCH_XREG_U8. */
6938DECL_INLINE_THROW(uint32_t)
6939iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
6940{
6941 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6942 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
6943
6944 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
6945 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
6946
6947 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6948 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6949
6950 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
6951
6952 /* Free but don't flush the source register. */
6953 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
6954 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6955
6956 return off;
6957}
6958
6959
6960#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
6961 off = iemNativeEmitSimdStoreXregU64(pReNative, off, a_iXReg, a_u64Value, a_iQWord)
6962
6963/** Emits code for IEM_MC_STORE_XREG_U64. */
6964DECL_INLINE_THROW(uint32_t)
6965iemNativeEmitSimdStoreXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar, uint8_t iQWord)
6966{
6967 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6968 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
6969
6970 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
6971 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
6972
6973 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6974
6975 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQWord);
6976 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
6977
6978 /* Free but don't flush the source register. */
6979 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
6980 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6981
6982 return off;
6983}
6984
6985
6986#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
6987 off = iemNativeEmitSimdStoreXregU32(pReNative, off, a_iXReg, a_u32Value, a_iDWord)
6988
6989/** Emits code for IEM_MC_STORE_XREG_U32. */
6990DECL_INLINE_THROW(uint32_t)
6991iemNativeEmitSimdStoreXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar, uint8_t iDWord)
6992{
6993 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6994 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
6995
6996 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
6997 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
6998
6999 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7000
7001 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iDWord);
7002 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7003
7004 /* Free but don't flush the source register. */
7005 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7006 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7007
7008 return off;
7009}
7010
7011
7012#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
7013 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
7014
7015/** Emits code for IEM_MC_STORE_XREG_U32. */
7016DECL_INLINE_THROW(uint32_t)
7017iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7018{
7019 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7020 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7021
7022 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7023 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7024
7025 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7026
7027 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
7028 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7029 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7030 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7031
7032 /* Free but don't flush the source register. */
7033 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7034 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7035
7036 return off;
7037}
7038
7039
7040#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
7041 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
7042
7043/** Emits code for IEM_MC_STORE_XREG_U32. */
7044DECL_INLINE_THROW(uint32_t)
7045iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7046{
7047 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7048 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7049
7050 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7051 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7052
7053 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7054
7055 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
7056 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7057 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7058 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7059
7060 /* Free but don't flush the source register. */
7061 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7062 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7063
7064 return off;
7065}
7066
7067
7068#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
7069 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
7070
7071/** Emits code for IEM_MC_FETCH_YREG_U64. */
7072DECL_INLINE_THROW(uint32_t)
7073iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
7074{
7075 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7076 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7077
7078 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7079 iQWord >= 2
7080 ? kIemNativeGstSimdRegLdStSz_High128
7081 : kIemNativeGstSimdRegLdStSz_Low128,
7082 kIemNativeGstRegUse_ReadOnly);
7083
7084 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7085 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7086
7087 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
7088
7089 /* Free but don't flush the source register. */
7090 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7091 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7092
7093 return off;
7094}
7095
7096
7097#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
7098 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
7099
7100/** Emits code for IEM_MC_FETCH_YREG_U32. */
7101DECL_INLINE_THROW(uint32_t)
7102iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
7103{
7104 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7105 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7106
7107 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7108 iDWord >= 4
7109 ? kIemNativeGstSimdRegLdStSz_High128
7110 : kIemNativeGstSimdRegLdStSz_Low128,
7111 kIemNativeGstRegUse_ReadOnly);
7112
7113 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7114 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7115
7116 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
7117
7118 /* Free but don't flush the source register. */
7119 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7120 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7121
7122 return off;
7123}
7124
7125
7126#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
7127 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
7128
7129/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
7130DECL_INLINE_THROW(uint32_t)
7131iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
7132{
7133 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7134 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
7135
7136 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
7137 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7138
7139 /* Free but don't flush the register. */
7140 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
7141
7142 return off;
7143}
7144
7145
7146#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
7147 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
7148
7149/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
7150DECL_INLINE_THROW(uint32_t)
7151iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7152{
7153 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7154 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
7155
7156 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7157 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7158
7159 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7160
7161 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
7162 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7163 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7164 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iXReg);
7165
7166 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7167 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7168
7169 return off;
7170}
7171
7172
7173#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
7174 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
7175
7176/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
7177DECL_INLINE_THROW(uint32_t)
7178iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7179{
7180 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7181 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
7182
7183 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7184 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7185
7186 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7187
7188 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
7189 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7190 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7191 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iXReg);
7192
7193 /* Free but don't flush the source register. */
7194 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7195 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7196
7197 return off;
7198}
7199
7200
7201#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
7202 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
7203
7204/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
7205DECL_INLINE_THROW(uint32_t)
7206iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7207{
7208 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7209 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
7210
7211 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7212 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7213
7214 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7215
7216 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
7217 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7218 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7219 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iXReg);
7220
7221 /* Free but don't flush the source register. */
7222 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7223 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7224
7225 return off;
7226}
7227
7228
7229#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
7230 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
7231
7232/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
7233DECL_INLINE_THROW(uint32_t)
7234iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7235{
7236 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7237 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
7238
7239 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7240 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7241
7242 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7243
7244 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
7245 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7246 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7247 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iXReg);
7248
7249 /* Free but don't flush the source register. */
7250 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7251 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7252
7253 return off;
7254}
7255
7256
7257#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
7258 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
7259
7260/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
7261DECL_INLINE_THROW(uint32_t)
7262iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
7263{
7264 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7265 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
7266
7267 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7268 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7269
7270 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7271
7272 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
7273 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
7274 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7275
7276 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7277 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7278
7279 return off;
7280}
7281
7282
7283#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
7284 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
7285
7286/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
7287DECL_INLINE_THROW(uint32_t)
7288iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
7289{
7290 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7291 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
7292
7293 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7294 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7295
7296 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7297
7298 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
7299 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
7300 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7301
7302 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7303 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7304
7305 return off;
7306}
7307
7308
7309#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
7310 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
7311
7312/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
7313DECL_INLINE_THROW(uint32_t)
7314iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
7315{
7316 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7317 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
7318
7319 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7320 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7321
7322 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7323
7324 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
7325 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
7326 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7327
7328 /* Free but don't flush the source register. */
7329 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7330 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7331
7332 return off;
7333}
7334
7335
7336#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
7337 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
7338
7339/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
7340DECL_INLINE_THROW(uint32_t)
7341iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
7342{
7343 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7344 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
7345
7346 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7347 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7348
7349 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7350
7351 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
7352 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
7353 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7354
7355 /* Free but don't flush the source register. */
7356 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7357 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7358
7359 return off;
7360}
7361
7362
7363#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
7364 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
7365
7366/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
7367DECL_INLINE_THROW(uint32_t)
7368iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
7369{
7370 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7371 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
7372
7373 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7374 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7375
7376 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7377
7378 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
7379 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
7380 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
7381 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7382
7383 /* Free but don't flush the source register. */
7384 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7385 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7386
7387 return off;
7388}
7389
7390
7391#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
7392 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
7393
7394/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
7395DECL_INLINE_THROW(uint32_t)
7396iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
7397{
7398 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7399 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
7400
7401 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7402 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7403
7404 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7405
7406 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
7407 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
7408 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
7409 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7410
7411 /* Free but don't flush the source register. */
7412 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7413 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7414
7415 return off;
7416}
7417
7418
7419#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
7420 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
7421
7422/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
7423DECL_INLINE_THROW(uint32_t)
7424iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
7425{
7426 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7427 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
7428
7429 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7430 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7431 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
7432 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7433 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7434
7435 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
7436 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
7437 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7438 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYRegDst);
7439 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYRegDst);
7440
7441 /* Free but don't flush the source and destination registers. */
7442 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
7443 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7444 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7445
7446 return off;
7447}
7448
7449
7450#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
7451 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
7452
7453/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
7454DECL_INLINE_THROW(uint32_t)
7455iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
7456{
7457 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7458 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
7459
7460 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7461 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7462 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
7463 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7464 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7465
7466 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
7467 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
7468 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7469 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYRegDst);
7470 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYRegDst);
7471
7472 /* Free but don't flush the source and destination registers. */
7473 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
7474 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7475 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7476
7477 return off;
7478}
7479
7480
7481#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
7482 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
7483
7484
7485/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
7486DECL_INLINE_THROW(uint32_t)
7487iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
7488{
7489 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7490 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7491
7492 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
7493 if (bImm8Mask & RT_BIT(0))
7494 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
7495 if (bImm8Mask & RT_BIT(1))
7496 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
7497 if (bImm8Mask & RT_BIT(2))
7498 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
7499 if (bImm8Mask & RT_BIT(3))
7500 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
7501 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7502
7503 /* Free but don't flush the destination register. */
7504 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7505
7506 return off;
7507}
7508
7509
7510
7511/*********************************************************************************************************************************
7512* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
7513*********************************************************************************************************************************/
7514
7515/**
7516 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX.
7517 */
7518DECL_INLINE_THROW(uint32_t)
7519iemNativeEmitCallSseAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
7520{
7521 /*
7522 * Need to do the FPU preparation.
7523 */
7524 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
7525
7526 /*
7527 * Do all the call setup and cleanup.
7528 */
7529 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS);
7530
7531 /*
7532 * Load the XState::x87 pointer.
7533 */
7534 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, kIemNativeGstRegRef_X87, 0 /*idxRegInClass*/);
7535
7536 /*
7537 * Make the call.
7538 */
7539 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
7540
7541 return off;
7542}
7543
7544
7545#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
7546 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
7547
7548/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
7549DECL_INLINE_THROW(uint32_t)
7550iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
7551{
7552 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
7553 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
7554 return iemNativeEmitCallSseAImplCommon(pReNative, off, pfnAImpl, 2);
7555}
7556
7557
7558#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
7559 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
7560
7561/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
7562DECL_INLINE_THROW(uint32_t)
7563iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
7564{
7565 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
7566 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
7567 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
7568 return iemNativeEmitCallSseAImplCommon(pReNative, off, pfnAImpl, 3);
7569}
7570#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
7571
7572
7573/*********************************************************************************************************************************
7574* Include instruction emitters. *
7575*********************************************************************************************************************************/
7576#include "target-x86/IEMAllN8veEmit-x86.h"
7577
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette