VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103784

Last change on this file since 103784 was 103784, checked in by vboxsync, 9 months ago

VMM/IEM: Implement emitter for IEM_MC_FETCH_YREG_U32(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 743.5 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103784 2024-03-11 17:35:04Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
136static uint32_t iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
137 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz);
138# endif
139static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
140#endif
141#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
142static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
143static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
144#endif
145DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
146DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
147 IEMNATIVEGSTREG enmGstReg, uint32_t off);
148DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
149
150
151/*********************************************************************************************************************************
152* Executable Memory Allocator *
153*********************************************************************************************************************************/
154/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
155 * Use an alternative chunk sub-allocator that does store internal data
156 * in the chunk.
157 *
158 * Using the RTHeapSimple is not practial on newer darwin systems where
159 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
160 * memory. We would have to change the protection of the whole chunk for
161 * every call to RTHeapSimple, which would be rather expensive.
162 *
163 * This alternative implemenation let restrict page protection modifications
164 * to the pages backing the executable memory we just allocated.
165 */
166#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
167/** The chunk sub-allocation unit size in bytes. */
168#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
169/** The chunk sub-allocation unit size as a shift factor. */
170#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
171
172#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
173# ifdef IEMNATIVE_USE_GDB_JIT
174# define IEMNATIVE_USE_GDB_JIT_ET_DYN
175
176/** GDB JIT: Code entry. */
177typedef struct GDBJITCODEENTRY
178{
179 struct GDBJITCODEENTRY *pNext;
180 struct GDBJITCODEENTRY *pPrev;
181 uint8_t *pbSymFile;
182 uint64_t cbSymFile;
183} GDBJITCODEENTRY;
184
185/** GDB JIT: Actions. */
186typedef enum GDBJITACTIONS : uint32_t
187{
188 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
189} GDBJITACTIONS;
190
191/** GDB JIT: Descriptor. */
192typedef struct GDBJITDESCRIPTOR
193{
194 uint32_t uVersion;
195 GDBJITACTIONS enmAction;
196 GDBJITCODEENTRY *pRelevant;
197 GDBJITCODEENTRY *pHead;
198 /** Our addition: */
199 GDBJITCODEENTRY *pTail;
200} GDBJITDESCRIPTOR;
201
202/** GDB JIT: Our simple symbol file data. */
203typedef struct GDBJITSYMFILE
204{
205 Elf64_Ehdr EHdr;
206# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Shdr aShdrs[5];
208# else
209 Elf64_Shdr aShdrs[7];
210 Elf64_Phdr aPhdrs[2];
211# endif
212 /** The dwarf ehframe data for the chunk. */
213 uint8_t abEhFrame[512];
214 char szzStrTab[128];
215 Elf64_Sym aSymbols[3];
216# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
217 Elf64_Sym aDynSyms[2];
218 Elf64_Dyn aDyn[6];
219# endif
220} GDBJITSYMFILE;
221
222extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
223extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
224
225/** Init once for g_IemNativeGdbJitLock. */
226static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
227/** Init once for the critical section. */
228static RTCRITSECT g_IemNativeGdbJitLock;
229
230/** GDB reads the info here. */
231GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
232
233/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
234DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
235{
236 ASMNopPause();
237}
238
239/** @callback_method_impl{FNRTONCE} */
240static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
241{
242 RT_NOREF(pvUser);
243 return RTCritSectInit(&g_IemNativeGdbJitLock);
244}
245
246
247# endif /* IEMNATIVE_USE_GDB_JIT */
248
249/**
250 * Per-chunk unwind info for non-windows hosts.
251 */
252typedef struct IEMEXECMEMCHUNKEHFRAME
253{
254# ifdef IEMNATIVE_USE_LIBUNWIND
255 /** The offset of the FDA into abEhFrame. */
256 uintptr_t offFda;
257# else
258 /** 'struct object' storage area. */
259 uint8_t abObject[1024];
260# endif
261# ifdef IEMNATIVE_USE_GDB_JIT
262# if 0
263 /** The GDB JIT 'symbol file' data. */
264 GDBJITSYMFILE GdbJitSymFile;
265# endif
266 /** The GDB JIT list entry. */
267 GDBJITCODEENTRY GdbJitEntry;
268# endif
269 /** The dwarf ehframe data for the chunk. */
270 uint8_t abEhFrame[512];
271} IEMEXECMEMCHUNKEHFRAME;
272/** Pointer to per-chunk info info for non-windows hosts. */
273typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
274#endif
275
276
277/**
278 * An chunk of executable memory.
279 */
280typedef struct IEMEXECMEMCHUNK
281{
282#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
283 /** Number of free items in this chunk. */
284 uint32_t cFreeUnits;
285 /** Hint were to start searching for free space in the allocation bitmap. */
286 uint32_t idxFreeHint;
287#else
288 /** The heap handle. */
289 RTHEAPSIMPLE hHeap;
290#endif
291 /** Pointer to the chunk. */
292 void *pvChunk;
293#ifdef IN_RING3
294 /**
295 * Pointer to the unwind information.
296 *
297 * This is used during C++ throw and longjmp (windows and probably most other
298 * platforms). Some debuggers (windbg) makes use of it as well.
299 *
300 * Windows: This is allocated from hHeap on windows because (at least for
301 * AMD64) the UNWIND_INFO structure address in the
302 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
303 *
304 * Others: Allocated from the regular heap to avoid unnecessary executable data
305 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
306 void *pvUnwindInfo;
307#elif defined(IN_RING0)
308 /** Allocation handle. */
309 RTR0MEMOBJ hMemObj;
310#endif
311} IEMEXECMEMCHUNK;
312/** Pointer to a memory chunk. */
313typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
314
315
316/**
317 * Executable memory allocator for the native recompiler.
318 */
319typedef struct IEMEXECMEMALLOCATOR
320{
321 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
322 uint32_t uMagic;
323
324 /** The chunk size. */
325 uint32_t cbChunk;
326 /** The maximum number of chunks. */
327 uint32_t cMaxChunks;
328 /** The current number of chunks. */
329 uint32_t cChunks;
330 /** Hint where to start looking for available memory. */
331 uint32_t idxChunkHint;
332 /** Statistics: Current number of allocations. */
333 uint32_t cAllocations;
334
335 /** The total amount of memory available. */
336 uint64_t cbTotal;
337 /** Total amount of free memory. */
338 uint64_t cbFree;
339 /** Total amount of memory allocated. */
340 uint64_t cbAllocated;
341
342#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
343 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
344 *
345 * Since the chunk size is a power of two and the minimum chunk size is a lot
346 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
347 * require a whole number of uint64_t elements in the allocation bitmap. So,
348 * for sake of simplicity, they are allocated as one continous chunk for
349 * simplicity/laziness. */
350 uint64_t *pbmAlloc;
351 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
352 uint32_t cUnitsPerChunk;
353 /** Number of bitmap elements per chunk (for quickly locating the bitmap
354 * portion corresponding to an chunk). */
355 uint32_t cBitmapElementsPerChunk;
356#else
357 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
358 * @{ */
359 /** The size of the heap internal block header. This is used to adjust the
360 * request memory size to make sure there is exacly enough room for a header at
361 * the end of the blocks we allocate before the next 64 byte alignment line. */
362 uint32_t cbHeapBlockHdr;
363 /** The size of initial heap allocation required make sure the first
364 * allocation is correctly aligned. */
365 uint32_t cbHeapAlignTweak;
366 /** The alignment tweak allocation address. */
367 void *pvAlignTweak;
368 /** @} */
369#endif
370
371#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
372 /** Pointer to the array of unwind info running parallel to aChunks (same
373 * allocation as this structure, located after the bitmaps).
374 * (For Windows, the structures must reside in 32-bit RVA distance to the
375 * actual chunk, so they are allocated off the chunk.) */
376 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
377#endif
378
379 /** The allocation chunks. */
380 RT_FLEXIBLE_ARRAY_EXTENSION
381 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
382} IEMEXECMEMALLOCATOR;
383/** Pointer to an executable memory allocator. */
384typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
385
386/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
387#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
388
389
390static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
391
392
393/**
394 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
395 * the heap statistics.
396 */
397static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
398 uint32_t cbReq, uint32_t idxChunk)
399{
400 pExecMemAllocator->cAllocations += 1;
401 pExecMemAllocator->cbAllocated += cbReq;
402#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
403 pExecMemAllocator->cbFree -= cbReq;
404#else
405 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
406#endif
407 pExecMemAllocator->idxChunkHint = idxChunk;
408
409#ifdef RT_OS_DARWIN
410 /*
411 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
412 * on darwin. So, we mark the pages returned as read+write after alloc and
413 * expect the caller to call iemExecMemAllocatorReadyForUse when done
414 * writing to the allocation.
415 *
416 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
417 * for details.
418 */
419 /** @todo detect if this is necessary... it wasn't required on 10.15 or
420 * whatever older version it was. */
421 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
422 AssertRC(rc);
423#endif
424
425 return pvRet;
426}
427
428
429#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
430static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
431 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
432{
433 /*
434 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
435 */
436 Assert(!(cToScan & 63));
437 Assert(!(idxFirst & 63));
438 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
439 pbmAlloc += idxFirst / 64;
440
441 /*
442 * Scan the bitmap for cReqUnits of consequtive clear bits
443 */
444 /** @todo This can probably be done more efficiently for non-x86 systems. */
445 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
446 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
447 {
448 uint32_t idxAddBit = 1;
449 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
450 idxAddBit++;
451 if (idxAddBit >= cReqUnits)
452 {
453 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
454
455 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
456 pChunk->cFreeUnits -= cReqUnits;
457 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
458
459 void * const pvRet = (uint8_t *)pChunk->pvChunk
460 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
461
462 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
463 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
464 }
465
466 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
467 }
468 return NULL;
469}
470#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
471
472
473static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
474{
475#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
476 /*
477 * Figure out how much to allocate.
478 */
479 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
480 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
481 {
482 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
483 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
484 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
485 {
486 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
487 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
488 if (pvRet)
489 return pvRet;
490 }
491 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
492 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
493 cReqUnits, idxChunk);
494 }
495#else
496 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
497 if (pvRet)
498 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
499#endif
500 return NULL;
501
502}
503
504
505/**
506 * Allocates @a cbReq bytes of executable memory.
507 *
508 * @returns Pointer to the memory, NULL if out of memory or other problem
509 * encountered.
510 * @param pVCpu The cross context virtual CPU structure of the calling
511 * thread.
512 * @param cbReq How many bytes are required.
513 */
514static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
515{
516 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
517 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
518 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
519
520
521 for (unsigned iIteration = 0;; iIteration++)
522 {
523 /*
524 * Adjust the request size so it'll fit the allocator alignment/whatnot.
525 *
526 * For the RTHeapSimple allocator this means to follow the logic described
527 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
528 * existing chunks if we think we've got sufficient free memory around.
529 *
530 * While for the alternative one we just align it up to a whole unit size.
531 */
532#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
533 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
534#else
535 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
536#endif
537 if (cbReq <= pExecMemAllocator->cbFree)
538 {
539 uint32_t const cChunks = pExecMemAllocator->cChunks;
540 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
541 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
542 {
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 }
547 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
548 {
549 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
550 if (pvRet)
551 return pvRet;
552 }
553 }
554
555 /*
556 * Can we grow it with another chunk?
557 */
558 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
559 {
560 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
561 AssertLogRelRCReturn(rc, NULL);
562
563 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
564 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
565 if (pvRet)
566 return pvRet;
567 AssertFailed();
568 }
569
570 /*
571 * Try prune native TBs once.
572 */
573 if (iIteration == 0)
574 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
575 else
576 {
577 /** @todo stats... */
578 return NULL;
579 }
580 }
581
582}
583
584
585/** This is a hook that we may need later for changing memory protection back
586 * to readonly+exec */
587static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
588{
589#ifdef RT_OS_DARWIN
590 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
591 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
592 AssertRC(rc); RT_NOREF(pVCpu);
593
594 /*
595 * Flush the instruction cache:
596 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
597 */
598 /* sys_dcache_flush(pv, cb); - not necessary */
599 sys_icache_invalidate(pv, cb);
600#else
601 RT_NOREF(pVCpu, pv, cb);
602#endif
603}
604
605
606/**
607 * Frees executable memory.
608 */
609void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
610{
611 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
612 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
613 Assert(pv);
614#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
615 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
616#else
617 Assert(!((uintptr_t)pv & 63));
618#endif
619
620 /* Align the size as we did when allocating the block. */
621#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
622 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
623#else
624 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
625#endif
626
627 /* Free it / assert sanity. */
628#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
629 uint32_t const cChunks = pExecMemAllocator->cChunks;
630 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
631 bool fFound = false;
632 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
633 {
634 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
635 fFound = offChunk < cbChunk;
636 if (fFound)
637 {
638#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
639 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
640 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
641
642 /* Check that it's valid and free it. */
643 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
644 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
645 for (uint32_t i = 1; i < cReqUnits; i++)
646 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
647 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
648
649 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
650 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
651
652 /* Update the stats. */
653 pExecMemAllocator->cbAllocated -= cb;
654 pExecMemAllocator->cbFree += cb;
655 pExecMemAllocator->cAllocations -= 1;
656 return;
657#else
658 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
659 break;
660#endif
661 }
662 }
663# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
664 AssertFailed();
665# else
666 Assert(fFound);
667# endif
668#endif
669
670#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
671 /* Update stats while cb is freshly calculated.*/
672 pExecMemAllocator->cbAllocated -= cb;
673 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
674 pExecMemAllocator->cAllocations -= 1;
675
676 /* Free it. */
677 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
678#endif
679}
680
681
682
683#ifdef IN_RING3
684# ifdef RT_OS_WINDOWS
685
686/**
687 * Initializes the unwind info structures for windows hosts.
688 */
689static int
690iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
691 void *pvChunk, uint32_t idxChunk)
692{
693 RT_NOREF(pVCpu);
694
695 /*
696 * The AMD64 unwind opcodes.
697 *
698 * This is a program that starts with RSP after a RET instruction that
699 * ends up in recompiled code, and the operations we describe here will
700 * restore all non-volatile registers and bring RSP back to where our
701 * RET address is. This means it's reverse order from what happens in
702 * the prologue.
703 *
704 * Note! Using a frame register approach here both because we have one
705 * and but mainly because the UWOP_ALLOC_LARGE argument values
706 * would be a pain to write initializers for. On the positive
707 * side, we're impervious to changes in the the stack variable
708 * area can can deal with dynamic stack allocations if necessary.
709 */
710 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
711 {
712 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
713 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
714 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
715 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
716 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
717 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
718 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
719 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
720 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
721 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
722 };
723 union
724 {
725 IMAGE_UNWIND_INFO Info;
726 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
727 } s_UnwindInfo =
728 {
729 {
730 /* .Version = */ 1,
731 /* .Flags = */ 0,
732 /* .SizeOfProlog = */ 16, /* whatever */
733 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
734 /* .FrameRegister = */ X86_GREG_xBP,
735 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
736 }
737 };
738 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
739 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
740
741 /*
742 * Calc how much space we need and allocate it off the exec heap.
743 */
744 unsigned const cFunctionEntries = 1;
745 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
746 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
747# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
749 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
750 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
751# else
752 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
753 - pExecMemAllocator->cbHeapBlockHdr;
754 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
755 32 /*cbAlignment*/);
756# endif
757 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
758 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
759
760 /*
761 * Initialize the structures.
762 */
763 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
764
765 paFunctions[0].BeginAddress = 0;
766 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
767 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
768
769 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
770 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
771
772 /*
773 * Register it.
774 */
775 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
776 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
777
778 return VINF_SUCCESS;
779}
780
781
782# else /* !RT_OS_WINDOWS */
783
784/**
785 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
786 */
787DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
788{
789 if (iValue >= 64)
790 {
791 Assert(iValue < 0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
794 }
795 else if (iValue >= 0)
796 *Ptr.pb++ = (uint8_t)iValue;
797 else if (iValue > -64)
798 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
799 else
800 {
801 Assert(iValue > -0x2000);
802 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
803 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
804 }
805 return Ptr;
806}
807
808
809/**
810 * Emits an ULEB128 encoded value (up to 64-bit wide).
811 */
812DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
813{
814 while (uValue >= 0x80)
815 {
816 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
817 uValue >>= 7;
818 }
819 *Ptr.pb++ = (uint8_t)uValue;
820 return Ptr;
821}
822
823
824/**
825 * Emits a CFA rule as register @a uReg + offset @a off.
826 */
827DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
828{
829 *Ptr.pb++ = DW_CFA_def_cfa;
830 Ptr = iemDwarfPutUleb128(Ptr, uReg);
831 Ptr = iemDwarfPutUleb128(Ptr, off);
832 return Ptr;
833}
834
835
836/**
837 * Emits a register (@a uReg) save location:
838 * CFA + @a off * data_alignment_factor
839 */
840DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
841{
842 if (uReg < 0x40)
843 *Ptr.pb++ = DW_CFA_offset | uReg;
844 else
845 {
846 *Ptr.pb++ = DW_CFA_offset_extended;
847 Ptr = iemDwarfPutUleb128(Ptr, uReg);
848 }
849 Ptr = iemDwarfPutUleb128(Ptr, off);
850 return Ptr;
851}
852
853
854# if 0 /* unused */
855/**
856 * Emits a register (@a uReg) save location, using signed offset:
857 * CFA + @a offSigned * data_alignment_factor
858 */
859DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
860{
861 *Ptr.pb++ = DW_CFA_offset_extended_sf;
862 Ptr = iemDwarfPutUleb128(Ptr, uReg);
863 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
864 return Ptr;
865}
866# endif
867
868
869/**
870 * Initializes the unwind info section for non-windows hosts.
871 */
872static int
873iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
874 void *pvChunk, uint32_t idxChunk)
875{
876 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
877 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
878
879 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
880
881 /*
882 * Generate the CIE first.
883 */
884# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
885 uint8_t const iDwarfVer = 3;
886# else
887 uint8_t const iDwarfVer = 4;
888# endif
889 RTPTRUNION const PtrCie = Ptr;
890 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
891 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
892 *Ptr.pb++ = iDwarfVer; /* DwARF version */
893 *Ptr.pb++ = 0; /* Augmentation. */
894 if (iDwarfVer >= 4)
895 {
896 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
897 *Ptr.pb++ = 0; /* Segment selector size. */
898 }
899# ifdef RT_ARCH_AMD64
900 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
901# else
902 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
903# endif
904 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
905# ifdef RT_ARCH_AMD64
906 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
907# elif defined(RT_ARCH_ARM64)
908 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
909# else
910# error "port me"
911# endif
912 /* Initial instructions: */
913# ifdef RT_ARCH_AMD64
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
922# elif defined(RT_ARCH_ARM64)
923# if 1
924 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
925# else
926 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
927# endif
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
936 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
937 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
938 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
939 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
940 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
941 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
942# else
943# error "port me"
944# endif
945 while ((Ptr.u - PtrCie.u) & 3)
946 *Ptr.pb++ = DW_CFA_nop;
947 /* Finalize the CIE size. */
948 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
949
950 /*
951 * Generate an FDE for the whole chunk area.
952 */
953# ifdef IEMNATIVE_USE_LIBUNWIND
954 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
955# endif
956 RTPTRUNION const PtrFde = Ptr;
957 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
958 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
959 Ptr.pu32++;
960 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
961 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
962# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
963 *Ptr.pb++ = DW_CFA_nop;
964# endif
965 while ((Ptr.u - PtrFde.u) & 3)
966 *Ptr.pb++ = DW_CFA_nop;
967 /* Finalize the FDE size. */
968 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
969
970 /* Terminator entry. */
971 *Ptr.pu32++ = 0;
972 *Ptr.pu32++ = 0; /* just to be sure... */
973 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
974
975 /*
976 * Register it.
977 */
978# ifdef IEMNATIVE_USE_LIBUNWIND
979 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
980# else
981 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
982 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
983# endif
984
985# ifdef IEMNATIVE_USE_GDB_JIT
986 /*
987 * Now for telling GDB about this (experimental).
988 *
989 * This seems to work best with ET_DYN.
990 */
991 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
992# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
993 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
994 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
995# else
996 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
997 - pExecMemAllocator->cbHeapBlockHdr;
998 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
999# endif
1000 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1001 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1002
1003 RT_ZERO(*pSymFile);
1004
1005 /*
1006 * The ELF header:
1007 */
1008 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1009 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1010 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1011 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1012 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1013 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1014 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1015 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1016# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1017 pSymFile->EHdr.e_type = ET_DYN;
1018# else
1019 pSymFile->EHdr.e_type = ET_REL;
1020# endif
1021# ifdef RT_ARCH_AMD64
1022 pSymFile->EHdr.e_machine = EM_AMD64;
1023# elif defined(RT_ARCH_ARM64)
1024 pSymFile->EHdr.e_machine = EM_AARCH64;
1025# else
1026# error "port me"
1027# endif
1028 pSymFile->EHdr.e_version = 1; /*?*/
1029 pSymFile->EHdr.e_entry = 0;
1030# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1031 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1032# else
1033 pSymFile->EHdr.e_phoff = 0;
1034# endif
1035 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1036 pSymFile->EHdr.e_flags = 0;
1037 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1038# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1039 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1040 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1041# else
1042 pSymFile->EHdr.e_phentsize = 0;
1043 pSymFile->EHdr.e_phnum = 0;
1044# endif
1045 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1046 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1047 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1048
1049 uint32_t offStrTab = 0;
1050#define APPEND_STR(a_szStr) do { \
1051 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1052 offStrTab += sizeof(a_szStr); \
1053 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1054 } while (0)
1055#define APPEND_STR_FMT(a_szStr, ...) do { \
1056 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1057 offStrTab++; \
1058 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1059 } while (0)
1060
1061 /*
1062 * Section headers.
1063 */
1064 /* Section header #0: NULL */
1065 unsigned i = 0;
1066 APPEND_STR("");
1067 RT_ZERO(pSymFile->aShdrs[i]);
1068 i++;
1069
1070 /* Section header: .eh_frame */
1071 pSymFile->aShdrs[i].sh_name = offStrTab;
1072 APPEND_STR(".eh_frame");
1073 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1074 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1075# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1076 pSymFile->aShdrs[i].sh_offset
1077 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1078# else
1079 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1080 pSymFile->aShdrs[i].sh_offset = 0;
1081# endif
1082
1083 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1084 pSymFile->aShdrs[i].sh_link = 0;
1085 pSymFile->aShdrs[i].sh_info = 0;
1086 pSymFile->aShdrs[i].sh_addralign = 1;
1087 pSymFile->aShdrs[i].sh_entsize = 0;
1088 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1089 i++;
1090
1091 /* Section header: .shstrtab */
1092 unsigned const iShStrTab = i;
1093 pSymFile->EHdr.e_shstrndx = iShStrTab;
1094 pSymFile->aShdrs[i].sh_name = offStrTab;
1095 APPEND_STR(".shstrtab");
1096 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1097 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1098# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1099 pSymFile->aShdrs[i].sh_offset
1100 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1101# else
1102 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1103 pSymFile->aShdrs[i].sh_offset = 0;
1104# endif
1105 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1106 pSymFile->aShdrs[i].sh_link = 0;
1107 pSymFile->aShdrs[i].sh_info = 0;
1108 pSymFile->aShdrs[i].sh_addralign = 1;
1109 pSymFile->aShdrs[i].sh_entsize = 0;
1110 i++;
1111
1112 /* Section header: .symbols */
1113 pSymFile->aShdrs[i].sh_name = offStrTab;
1114 APPEND_STR(".symtab");
1115 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1116 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1117 pSymFile->aShdrs[i].sh_offset
1118 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1119 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1120 pSymFile->aShdrs[i].sh_link = iShStrTab;
1121 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1122 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1123 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1124 i++;
1125
1126# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1127 /* Section header: .symbols */
1128 pSymFile->aShdrs[i].sh_name = offStrTab;
1129 APPEND_STR(".dynsym");
1130 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1131 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1132 pSymFile->aShdrs[i].sh_offset
1133 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1134 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1135 pSymFile->aShdrs[i].sh_link = iShStrTab;
1136 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1137 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1138 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1139 i++;
1140# endif
1141
1142# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1143 /* Section header: .dynamic */
1144 pSymFile->aShdrs[i].sh_name = offStrTab;
1145 APPEND_STR(".dynamic");
1146 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1147 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1148 pSymFile->aShdrs[i].sh_offset
1149 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1150 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1151 pSymFile->aShdrs[i].sh_link = iShStrTab;
1152 pSymFile->aShdrs[i].sh_info = 0;
1153 pSymFile->aShdrs[i].sh_addralign = 1;
1154 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1155 i++;
1156# endif
1157
1158 /* Section header: .text */
1159 unsigned const iShText = i;
1160 pSymFile->aShdrs[i].sh_name = offStrTab;
1161 APPEND_STR(".text");
1162 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1163 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1164# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1165 pSymFile->aShdrs[i].sh_offset
1166 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1167# else
1168 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1169 pSymFile->aShdrs[i].sh_offset = 0;
1170# endif
1171 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1172 pSymFile->aShdrs[i].sh_link = 0;
1173 pSymFile->aShdrs[i].sh_info = 0;
1174 pSymFile->aShdrs[i].sh_addralign = 1;
1175 pSymFile->aShdrs[i].sh_entsize = 0;
1176 i++;
1177
1178 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1179
1180# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1181 /*
1182 * The program headers:
1183 */
1184 /* Everything in a single LOAD segment: */
1185 i = 0;
1186 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1187 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = 0;
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1193 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1194 i++;
1195 /* The .dynamic segment. */
1196 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1197 pSymFile->aPhdrs[i].p_flags = PF_R;
1198 pSymFile->aPhdrs[i].p_offset
1199 = pSymFile->aPhdrs[i].p_vaddr
1200 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1201 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1202 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1203 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1204 i++;
1205
1206 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1207
1208 /*
1209 * The dynamic section:
1210 */
1211 i = 0;
1212 pSymFile->aDyn[i].d_tag = DT_SONAME;
1213 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1214 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1215 i++;
1216 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1217 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1218 i++;
1219 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1220 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1221 i++;
1222 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1223 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1224 i++;
1225 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1226 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1227 i++;
1228 pSymFile->aDyn[i].d_tag = DT_NULL;
1229 i++;
1230 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1231# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1232
1233 /*
1234 * Symbol tables:
1235 */
1236 /** @todo gdb doesn't seem to really like this ... */
1237 i = 0;
1238 pSymFile->aSymbols[i].st_name = 0;
1239 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1240 pSymFile->aSymbols[i].st_value = 0;
1241 pSymFile->aSymbols[i].st_size = 0;
1242 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1243 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1244# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1245 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1246# endif
1247 i++;
1248
1249 pSymFile->aSymbols[i].st_name = 0;
1250 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1251 pSymFile->aSymbols[i].st_value = 0;
1252 pSymFile->aSymbols[i].st_size = 0;
1253 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1254 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1255 i++;
1256
1257 pSymFile->aSymbols[i].st_name = offStrTab;
1258 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1259# if 0
1260 pSymFile->aSymbols[i].st_shndx = iShText;
1261 pSymFile->aSymbols[i].st_value = 0;
1262# else
1263 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1264 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1265# endif
1266 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1267 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1268 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1269# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1270 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1271 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1272# endif
1273 i++;
1274
1275 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1276 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1277
1278 /*
1279 * The GDB JIT entry and informing GDB.
1280 */
1281 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1282# if 1
1283 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1284# else
1285 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1286# endif
1287
1288 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1289 RTCritSectEnter(&g_IemNativeGdbJitLock);
1290 pEhFrame->GdbJitEntry.pNext = NULL;
1291 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1292 if (__jit_debug_descriptor.pTail)
1293 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1294 else
1295 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1296 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1297 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1298
1299 /* Notify GDB: */
1300 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1301 __jit_debug_register_code();
1302 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1303 RTCritSectLeave(&g_IemNativeGdbJitLock);
1304
1305# else /* !IEMNATIVE_USE_GDB_JIT */
1306 RT_NOREF(pVCpu);
1307# endif /* !IEMNATIVE_USE_GDB_JIT */
1308
1309 return VINF_SUCCESS;
1310}
1311
1312# endif /* !RT_OS_WINDOWS */
1313#endif /* IN_RING3 */
1314
1315
1316/**
1317 * Adds another chunk to the executable memory allocator.
1318 *
1319 * This is used by the init code for the initial allocation and later by the
1320 * regular allocator function when it's out of memory.
1321 */
1322static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1323{
1324 /* Check that we've room for growth. */
1325 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1326 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1327
1328 /* Allocate a chunk. */
1329#ifdef RT_OS_DARWIN
1330 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1331#else
1332 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1333#endif
1334 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1335
1336#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1337 int rc = VINF_SUCCESS;
1338#else
1339 /* Initialize the heap for the chunk. */
1340 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1341 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1342 AssertRC(rc);
1343 if (RT_SUCCESS(rc))
1344 {
1345 /*
1346 * We want the memory to be aligned on 64 byte, so the first time thru
1347 * here we do some exploratory allocations to see how we can achieve this.
1348 * On subsequent runs we only make an initial adjustment allocation, if
1349 * necessary.
1350 *
1351 * Since we own the heap implementation, we know that the internal block
1352 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1353 * so all we need to wrt allocation size adjustments is to add 32 bytes
1354 * to the size, align up by 64 bytes, and subtract 32 bytes.
1355 *
1356 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1357 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1358 * allocation to force subsequent allocations to return 64 byte aligned
1359 * user areas.
1360 */
1361 if (!pExecMemAllocator->cbHeapBlockHdr)
1362 {
1363 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1364 pExecMemAllocator->cbHeapAlignTweak = 64;
1365 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1366 32 /*cbAlignment*/);
1367 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1368
1369 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1370 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1371 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1372 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1373 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1374
1375 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1376 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1377 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1378 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1379 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1380
1381 RTHeapSimpleFree(hHeap, pvTest2);
1382 RTHeapSimpleFree(hHeap, pvTest1);
1383 }
1384 else
1385 {
1386 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1387 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1388 }
1389 if (RT_SUCCESS(rc))
1390#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1391 {
1392 /*
1393 * Add the chunk.
1394 *
1395 * This must be done before the unwind init so windows can allocate
1396 * memory from the chunk when using the alternative sub-allocator.
1397 */
1398 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1399#ifdef IN_RING3
1400 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1401#endif
1402#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1403 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1404#else
1405 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1406 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1407 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1408 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1409#endif
1410
1411 pExecMemAllocator->cChunks = idxChunk + 1;
1412 pExecMemAllocator->idxChunkHint = idxChunk;
1413
1414#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1415 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1416 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1417#else
1418 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1419 pExecMemAllocator->cbTotal += cbFree;
1420 pExecMemAllocator->cbFree += cbFree;
1421#endif
1422
1423#ifdef IN_RING3
1424 /*
1425 * Initialize the unwind information (this cannot really fail atm).
1426 * (This sets pvUnwindInfo.)
1427 */
1428 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1429 if (RT_SUCCESS(rc))
1430#endif
1431 {
1432 return VINF_SUCCESS;
1433 }
1434
1435#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1436 /* Just in case the impossible happens, undo the above up: */
1437 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1438 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1439 pExecMemAllocator->cChunks = idxChunk;
1440 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1441 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1442 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1443 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1444#endif
1445 }
1446#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1447 }
1448#endif
1449 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1450 RT_NOREF(pVCpu);
1451 return rc;
1452}
1453
1454
1455/**
1456 * Initializes the executable memory allocator for native recompilation on the
1457 * calling EMT.
1458 *
1459 * @returns VBox status code.
1460 * @param pVCpu The cross context virtual CPU structure of the calling
1461 * thread.
1462 * @param cbMax The max size of the allocator.
1463 * @param cbInitial The initial allocator size.
1464 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1465 * dependent).
1466 */
1467int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1468{
1469 /*
1470 * Validate input.
1471 */
1472 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1473 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1474 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1475 || cbChunk == 0
1476 || ( RT_IS_POWER_OF_TWO(cbChunk)
1477 && cbChunk >= _1M
1478 && cbChunk <= _256M
1479 && cbChunk <= cbMax),
1480 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1481 VERR_OUT_OF_RANGE);
1482
1483 /*
1484 * Adjust/figure out the chunk size.
1485 */
1486 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1487 {
1488 if (cbMax >= _256M)
1489 cbChunk = _64M;
1490 else
1491 {
1492 if (cbMax < _16M)
1493 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1494 else
1495 cbChunk = (uint32_t)cbMax / 4;
1496 if (!RT_IS_POWER_OF_TWO(cbChunk))
1497 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1498 }
1499 }
1500
1501 if (cbChunk > cbMax)
1502 cbMax = cbChunk;
1503 else
1504 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1505 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1506 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1507
1508 /*
1509 * Allocate and initialize the allocatore instance.
1510 */
1511 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1512#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1513 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1514 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1515 cbNeeded += cbBitmap * cMaxChunks;
1516 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1517 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1518#endif
1519#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1520 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1521 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1522#endif
1523 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1524 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1525 VERR_NO_MEMORY);
1526 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1527 pExecMemAllocator->cbChunk = cbChunk;
1528 pExecMemAllocator->cMaxChunks = cMaxChunks;
1529 pExecMemAllocator->cChunks = 0;
1530 pExecMemAllocator->idxChunkHint = 0;
1531 pExecMemAllocator->cAllocations = 0;
1532 pExecMemAllocator->cbTotal = 0;
1533 pExecMemAllocator->cbFree = 0;
1534 pExecMemAllocator->cbAllocated = 0;
1535#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1536 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1537 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1538 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1539 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1540#endif
1541#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1542 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1543#endif
1544 for (uint32_t i = 0; i < cMaxChunks; i++)
1545 {
1546#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1547 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1548 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1549#else
1550 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1551#endif
1552 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1553#ifdef IN_RING0
1554 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1555#else
1556 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1557#endif
1558 }
1559 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1560
1561 /*
1562 * Do the initial allocations.
1563 */
1564 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1565 {
1566 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1567 AssertLogRelRCReturn(rc, rc);
1568 }
1569
1570 pExecMemAllocator->idxChunkHint = 0;
1571
1572 return VINF_SUCCESS;
1573}
1574
1575
1576/*********************************************************************************************************************************
1577* Native Recompilation *
1578*********************************************************************************************************************************/
1579
1580
1581/**
1582 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1585{
1586 pVCpu->iem.s.cInstructions += idxInstr;
1587 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1588}
1589
1590
1591/**
1592 * Used by TB code when it wants to raise a \#GP(0).
1593 */
1594IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1595{
1596 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1597#ifndef _MSC_VER
1598 return VINF_IEM_RAISED_XCPT; /* not reached */
1599#endif
1600}
1601
1602
1603/**
1604 * Used by TB code when it wants to raise a \#NM.
1605 */
1606IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1607{
1608 iemRaiseDeviceNotAvailableJmp(pVCpu);
1609#ifndef _MSC_VER
1610 return VINF_IEM_RAISED_XCPT; /* not reached */
1611#endif
1612}
1613
1614
1615/**
1616 * Used by TB code when it wants to raise a \#UD.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1619{
1620 iemRaiseUndefinedOpcodeJmp(pVCpu);
1621#ifndef _MSC_VER
1622 return VINF_IEM_RAISED_XCPT; /* not reached */
1623#endif
1624}
1625
1626
1627/**
1628 * Used by TB code when it wants to raise a \#MF.
1629 */
1630IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1631{
1632 iemRaiseMathFaultJmp(pVCpu);
1633#ifndef _MSC_VER
1634 return VINF_IEM_RAISED_XCPT; /* not reached */
1635#endif
1636}
1637
1638
1639/**
1640 * Used by TB code when it wants to raise a \#XF.
1641 */
1642IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1643{
1644 iemRaiseSimdFpExceptionJmp(pVCpu);
1645#ifndef _MSC_VER
1646 return VINF_IEM_RAISED_XCPT; /* not reached */
1647#endif
1648}
1649
1650
1651/**
1652 * Used by TB code when detecting opcode changes.
1653 * @see iemThreadeFuncWorkerObsoleteTb
1654 */
1655IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1656{
1657 /* We set fSafeToFree to false where as we're being called in the context
1658 of a TB callback function, which for native TBs means we cannot release
1659 the executable memory till we've returned our way back to iemTbExec as
1660 that return path codes via the native code generated for the TB. */
1661 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1662 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1663 return VINF_IEM_REEXEC_BREAK;
1664}
1665
1666
1667/**
1668 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1669 */
1670IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1671{
1672 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1673 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1674 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1675 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1676 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1677 return VINF_IEM_REEXEC_BREAK;
1678}
1679
1680
1681/**
1682 * Used by TB code when we missed a PC check after a branch.
1683 */
1684IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1685{
1686 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1687 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1688 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1689 pVCpu->iem.s.pbInstrBuf));
1690 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1691 return VINF_IEM_REEXEC_BREAK;
1692}
1693
1694
1695
1696/*********************************************************************************************************************************
1697* Helpers: Segmented memory fetches and stores. *
1698*********************************************************************************************************************************/
1699
1700/**
1701 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1702 */
1703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1704{
1705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1706 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1707#else
1708 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1709#endif
1710}
1711
1712
1713/**
1714 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1715 * to 16 bits.
1716 */
1717IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1718{
1719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1720 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1721#else
1722 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1723#endif
1724}
1725
1726
1727/**
1728 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1729 * to 32 bits.
1730 */
1731IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1732{
1733#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1734 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1735#else
1736 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1737#endif
1738}
1739
1740/**
1741 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1742 * to 64 bits.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1745{
1746#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1747 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1748#else
1749 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1750#endif
1751}
1752
1753
1754/**
1755 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1756 */
1757IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1758{
1759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1760 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1761#else
1762 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1763#endif
1764}
1765
1766
1767/**
1768 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1769 * to 32 bits.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1772{
1773#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1774 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1775#else
1776 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1777#endif
1778}
1779
1780
1781/**
1782 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1783 * to 64 bits.
1784 */
1785IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1786{
1787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1788 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1789#else
1790 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1791#endif
1792}
1793
1794
1795/**
1796 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1797 */
1798IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1799{
1800#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1801 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1802#else
1803 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1804#endif
1805}
1806
1807
1808/**
1809 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1810 * to 64 bits.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1815 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1816#else
1817 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1824 */
1825IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1826{
1827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1828 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1829#else
1830 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1831#endif
1832}
1833
1834
1835/**
1836 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1837 */
1838IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1839{
1840#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1841 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1842#else
1843 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1844#endif
1845}
1846
1847
1848/**
1849 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1850 */
1851IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1852{
1853#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1854 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1855#else
1856 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1857#endif
1858}
1859
1860
1861/**
1862 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1863 */
1864IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1865{
1866#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1867 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1868#else
1869 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1870#endif
1871}
1872
1873
1874/**
1875 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1878{
1879#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1880 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1881#else
1882 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1883#endif
1884}
1885
1886
1887
1888/**
1889 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1890 */
1891IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1892{
1893#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1894 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1895#else
1896 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1897#endif
1898}
1899
1900
1901/**
1902 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1903 */
1904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1905{
1906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1907 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1908#else
1909 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1910#endif
1911}
1912
1913
1914/**
1915 * Used by TB code to store an 32-bit selector value onto a generic stack.
1916 *
1917 * Intel CPUs doesn't do write a whole dword, thus the special function.
1918 */
1919IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1920{
1921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1922 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1923#else
1924 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1925#endif
1926}
1927
1928
1929/**
1930 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1931 */
1932IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1933{
1934#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1935 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1936#else
1937 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1938#endif
1939}
1940
1941
1942/**
1943 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1944 */
1945IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1946{
1947#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1948 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1949#else
1950 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1951#endif
1952}
1953
1954
1955/**
1956 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1957 */
1958IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1959{
1960#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1961 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1962#else
1963 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1964#endif
1965}
1966
1967
1968/**
1969 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1970 */
1971IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1972{
1973#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1974 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1975#else
1976 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1977#endif
1978}
1979
1980
1981
1982/*********************************************************************************************************************************
1983* Helpers: Flat memory fetches and stores. *
1984*********************************************************************************************************************************/
1985
1986/**
1987 * Used by TB code to load unsigned 8-bit data w/ flat address.
1988 * @note Zero extending the value to 64-bit to simplify assembly.
1989 */
1990IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1991{
1992#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1993 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1994#else
1995 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1996#endif
1997}
1998
1999
2000/**
2001 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2002 * to 16 bits.
2003 * @note Zero extending the value to 64-bit to simplify assembly.
2004 */
2005IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2006{
2007#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2008 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2009#else
2010 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2011#endif
2012}
2013
2014
2015/**
2016 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2017 * to 32 bits.
2018 * @note Zero extending the value to 64-bit to simplify assembly.
2019 */
2020IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2021{
2022#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2023 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2024#else
2025 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2026#endif
2027}
2028
2029
2030/**
2031 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2032 * to 64 bits.
2033 */
2034IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2035{
2036#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2037 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2038#else
2039 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2040#endif
2041}
2042
2043
2044/**
2045 * Used by TB code to load unsigned 16-bit data w/ flat address.
2046 * @note Zero extending the value to 64-bit to simplify assembly.
2047 */
2048IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2049{
2050#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2051 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2052#else
2053 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2054#endif
2055}
2056
2057
2058/**
2059 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2060 * to 32 bits.
2061 * @note Zero extending the value to 64-bit to simplify assembly.
2062 */
2063IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2064{
2065#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2066 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2067#else
2068 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2069#endif
2070}
2071
2072
2073/**
2074 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2075 * to 64 bits.
2076 * @note Zero extending the value to 64-bit to simplify assembly.
2077 */
2078IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2079{
2080#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2081 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2082#else
2083 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2084#endif
2085}
2086
2087
2088/**
2089 * Used by TB code to load unsigned 32-bit data w/ flat address.
2090 * @note Zero extending the value to 64-bit to simplify assembly.
2091 */
2092IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2093{
2094#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2095 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2096#else
2097 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2098#endif
2099}
2100
2101
2102/**
2103 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2104 * to 64 bits.
2105 * @note Zero extending the value to 64-bit to simplify assembly.
2106 */
2107IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2108{
2109#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2110 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2111#else
2112 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2113#endif
2114}
2115
2116
2117/**
2118 * Used by TB code to load unsigned 64-bit data w/ flat address.
2119 */
2120IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2121{
2122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2123 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2124#else
2125 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2126#endif
2127}
2128
2129
2130/**
2131 * Used by TB code to store unsigned 8-bit data w/ flat address.
2132 */
2133IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2134{
2135#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2136 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2137#else
2138 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2139#endif
2140}
2141
2142
2143/**
2144 * Used by TB code to store unsigned 16-bit data w/ flat address.
2145 */
2146IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2147{
2148#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2149 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2150#else
2151 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2152#endif
2153}
2154
2155
2156/**
2157 * Used by TB code to store unsigned 32-bit data w/ flat address.
2158 */
2159IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2160{
2161#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2162 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2163#else
2164 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2165#endif
2166}
2167
2168
2169/**
2170 * Used by TB code to store unsigned 64-bit data w/ flat address.
2171 */
2172IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2173{
2174#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2175 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2176#else
2177 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2178#endif
2179}
2180
2181
2182
2183/**
2184 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2185 */
2186IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2187{
2188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2189 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2190#else
2191 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2192#endif
2193}
2194
2195
2196/**
2197 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2198 */
2199IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2200{
2201#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2202 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2203#else
2204 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2205#endif
2206}
2207
2208
2209/**
2210 * Used by TB code to store a segment selector value onto a flat stack.
2211 *
2212 * Intel CPUs doesn't do write a whole dword, thus the special function.
2213 */
2214IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2215{
2216#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2217 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2218#else
2219 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2220#endif
2221}
2222
2223
2224/**
2225 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2226 */
2227IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2228{
2229#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2230 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2231#else
2232 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2233#endif
2234}
2235
2236
2237/**
2238 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2239 */
2240IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2241{
2242#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2243 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2244#else
2245 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2246#endif
2247}
2248
2249
2250/**
2251 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2252 */
2253IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2254{
2255#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2256 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2257#else
2258 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2259#endif
2260}
2261
2262
2263/**
2264 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2265 */
2266IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2267{
2268#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2269 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2270#else
2271 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2272#endif
2273}
2274
2275
2276
2277/*********************************************************************************************************************************
2278* Helpers: Segmented memory mapping. *
2279*********************************************************************************************************************************/
2280
2281/**
2282 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2283 * segmentation.
2284 */
2285IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2286 RTGCPTR GCPtrMem, uint8_t iSegReg))
2287{
2288#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2289 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2290#else
2291 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2292#endif
2293}
2294
2295
2296/**
2297 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2298 */
2299IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2300 RTGCPTR GCPtrMem, uint8_t iSegReg))
2301{
2302#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2303 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2304#else
2305 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2306#endif
2307}
2308
2309
2310/**
2311 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2312 */
2313IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2314 RTGCPTR GCPtrMem, uint8_t iSegReg))
2315{
2316#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2317 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2318#else
2319 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2320#endif
2321}
2322
2323
2324/**
2325 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2326 */
2327IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2328 RTGCPTR GCPtrMem, uint8_t iSegReg))
2329{
2330#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2331 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2332#else
2333 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2334#endif
2335}
2336
2337
2338/**
2339 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2340 * segmentation.
2341 */
2342IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2343 RTGCPTR GCPtrMem, uint8_t iSegReg))
2344{
2345#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2346 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2347#else
2348 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2349#endif
2350}
2351
2352
2353/**
2354 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2355 */
2356IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2357 RTGCPTR GCPtrMem, uint8_t iSegReg))
2358{
2359#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2360 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2361#else
2362 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2363#endif
2364}
2365
2366
2367/**
2368 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2369 */
2370IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2371 RTGCPTR GCPtrMem, uint8_t iSegReg))
2372{
2373#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2374 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2375#else
2376 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2377#endif
2378}
2379
2380
2381/**
2382 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2383 */
2384IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2385 RTGCPTR GCPtrMem, uint8_t iSegReg))
2386{
2387#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2388 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2389#else
2390 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2391#endif
2392}
2393
2394
2395/**
2396 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2397 * segmentation.
2398 */
2399IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2400 RTGCPTR GCPtrMem, uint8_t iSegReg))
2401{
2402#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2403 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2404#else
2405 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2406#endif
2407}
2408
2409
2410/**
2411 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2412 */
2413IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2414 RTGCPTR GCPtrMem, uint8_t iSegReg))
2415{
2416#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2417 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2418#else
2419 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2420#endif
2421}
2422
2423
2424/**
2425 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2426 */
2427IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2428 RTGCPTR GCPtrMem, uint8_t iSegReg))
2429{
2430#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2431 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2432#else
2433 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2434#endif
2435}
2436
2437
2438/**
2439 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2440 */
2441IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2442 RTGCPTR GCPtrMem, uint8_t iSegReg))
2443{
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2445 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2446#else
2447 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2448#endif
2449}
2450
2451
2452/**
2453 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2454 * segmentation.
2455 */
2456IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2457 RTGCPTR GCPtrMem, uint8_t iSegReg))
2458{
2459#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2460 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2461#else
2462 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2463#endif
2464}
2465
2466
2467/**
2468 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2469 */
2470IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2471 RTGCPTR GCPtrMem, uint8_t iSegReg))
2472{
2473#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2474 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2475#else
2476 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2477#endif
2478}
2479
2480
2481/**
2482 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2483 */
2484IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2485 RTGCPTR GCPtrMem, uint8_t iSegReg))
2486{
2487#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2488 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2489#else
2490 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2491#endif
2492}
2493
2494
2495/**
2496 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2497 */
2498IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2499 RTGCPTR GCPtrMem, uint8_t iSegReg))
2500{
2501#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2502 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2503#else
2504 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2505#endif
2506}
2507
2508
2509/**
2510 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2511 */
2512IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2513 RTGCPTR GCPtrMem, uint8_t iSegReg))
2514{
2515#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2516 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2517#else
2518 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2519#endif
2520}
2521
2522
2523/**
2524 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2525 */
2526IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2527 RTGCPTR GCPtrMem, uint8_t iSegReg))
2528{
2529#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2530 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2531#else
2532 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2533#endif
2534}
2535
2536
2537/**
2538 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2539 * segmentation.
2540 */
2541IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2542 RTGCPTR GCPtrMem, uint8_t iSegReg))
2543{
2544#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2545 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2546#else
2547 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2548#endif
2549}
2550
2551
2552/**
2553 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2554 */
2555IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2556 RTGCPTR GCPtrMem, uint8_t iSegReg))
2557{
2558#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2559 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2560#else
2561 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2562#endif
2563}
2564
2565
2566/**
2567 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2568 */
2569IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2570 RTGCPTR GCPtrMem, uint8_t iSegReg))
2571{
2572#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2573 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2574#else
2575 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2576#endif
2577}
2578
2579
2580/**
2581 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2582 */
2583IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2584 RTGCPTR GCPtrMem, uint8_t iSegReg))
2585{
2586#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2587 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2588#else
2589 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2590#endif
2591}
2592
2593
2594/*********************************************************************************************************************************
2595* Helpers: Flat memory mapping. *
2596*********************************************************************************************************************************/
2597
2598/**
2599 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2600 * address.
2601 */
2602IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2603{
2604#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2605 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2606#else
2607 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2608#endif
2609}
2610
2611
2612/**
2613 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2614 */
2615IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2616{
2617#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2618 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2619#else
2620 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2621#endif
2622}
2623
2624
2625/**
2626 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2627 */
2628IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2629{
2630#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2631 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2632#else
2633 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2634#endif
2635}
2636
2637
2638/**
2639 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2640 */
2641IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2642{
2643#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2644 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2645#else
2646 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2647#endif
2648}
2649
2650
2651/**
2652 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2653 * address.
2654 */
2655IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2656{
2657#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2658 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2659#else
2660 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2661#endif
2662}
2663
2664
2665/**
2666 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2667 */
2668IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2669{
2670#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2671 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2672#else
2673 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2674#endif
2675}
2676
2677
2678/**
2679 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2680 */
2681IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2682{
2683#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2684 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2685#else
2686 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2687#endif
2688}
2689
2690
2691/**
2692 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2693 */
2694IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2695{
2696#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2697 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2698#else
2699 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2700#endif
2701}
2702
2703
2704/**
2705 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2706 * address.
2707 */
2708IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2709{
2710#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2711 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2712#else
2713 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2714#endif
2715}
2716
2717
2718/**
2719 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2720 */
2721IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2722{
2723#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2724 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2725#else
2726 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2727#endif
2728}
2729
2730
2731/**
2732 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2733 */
2734IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2735{
2736#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2737 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2738#else
2739 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2740#endif
2741}
2742
2743
2744/**
2745 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2746 */
2747IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2748{
2749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2750 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2751#else
2752 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2753#endif
2754}
2755
2756
2757/**
2758 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2759 * address.
2760 */
2761IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2762{
2763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2764 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2765#else
2766 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2767#endif
2768}
2769
2770
2771/**
2772 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2773 */
2774IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2775{
2776#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2777 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2778#else
2779 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2780#endif
2781}
2782
2783
2784/**
2785 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2786 */
2787IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2788{
2789#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2790 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2791#else
2792 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2793#endif
2794}
2795
2796
2797/**
2798 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2799 */
2800IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2801{
2802#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2803 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2804#else
2805 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2806#endif
2807}
2808
2809
2810/**
2811 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2812 */
2813IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2814{
2815#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2816 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2817#else
2818 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2819#endif
2820}
2821
2822
2823/**
2824 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2825 */
2826IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2827{
2828#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2829 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2830#else
2831 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2832#endif
2833}
2834
2835
2836/**
2837 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2838 * address.
2839 */
2840IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2841{
2842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2843 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2844#else
2845 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2846#endif
2847}
2848
2849
2850/**
2851 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2852 */
2853IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2854{
2855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2856 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2857#else
2858 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2859#endif
2860}
2861
2862
2863/**
2864 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2865 */
2866IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2867{
2868#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2869 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2870#else
2871 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2872#endif
2873}
2874
2875
2876/**
2877 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2878 */
2879IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2880{
2881#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2882 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2883#else
2884 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2885#endif
2886}
2887
2888
2889/*********************************************************************************************************************************
2890* Helpers: Commit, rollback & unmap *
2891*********************************************************************************************************************************/
2892
2893/**
2894 * Used by TB code to commit and unmap a read-write memory mapping.
2895 */
2896IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2897{
2898 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2899}
2900
2901
2902/**
2903 * Used by TB code to commit and unmap a read-write memory mapping.
2904 */
2905IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2906{
2907 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2908}
2909
2910
2911/**
2912 * Used by TB code to commit and unmap a write-only memory mapping.
2913 */
2914IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2915{
2916 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2917}
2918
2919
2920/**
2921 * Used by TB code to commit and unmap a read-only memory mapping.
2922 */
2923IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2924{
2925 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2926}
2927
2928
2929/**
2930 * Reinitializes the native recompiler state.
2931 *
2932 * Called before starting a new recompile job.
2933 */
2934static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2935{
2936 pReNative->cLabels = 0;
2937 pReNative->bmLabelTypes = 0;
2938 pReNative->cFixups = 0;
2939#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2940 pReNative->pDbgInfo->cEntries = 0;
2941#endif
2942 pReNative->pTbOrg = pTb;
2943 pReNative->cCondDepth = 0;
2944 pReNative->uCondSeqNo = 0;
2945 pReNative->uCheckIrqSeqNo = 0;
2946 pReNative->uTlbSeqNo = 0;
2947
2948#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2949 pReNative->Core.offPc = 0;
2950 pReNative->Core.cInstrPcUpdateSkipped = 0;
2951#endif
2952 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2953#if IEMNATIVE_HST_GREG_COUNT < 32
2954 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2955#endif
2956 ;
2957 pReNative->Core.bmHstRegsWithGstShadow = 0;
2958 pReNative->Core.bmGstRegShadows = 0;
2959 pReNative->Core.bmVars = 0;
2960 pReNative->Core.bmStack = 0;
2961 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2962 pReNative->Core.u64ArgVars = UINT64_MAX;
2963
2964 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 13);
2965 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2966 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2967 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2968 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2969 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2970 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2971 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2972 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2973 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2974 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2975 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2976 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2977 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2978
2979 /* Full host register reinit: */
2980 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2981 {
2982 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2983 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2984 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2985 }
2986
2987 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2988 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2989#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2990 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2991#endif
2992#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2993 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2994#endif
2995#ifdef IEMNATIVE_REG_FIXED_TMP1
2996 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2997#endif
2998#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2999 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3000#endif
3001 );
3002 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3003 {
3004 fRegs &= ~RT_BIT_32(idxReg);
3005 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3006 }
3007
3008 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3009#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3010 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3011#endif
3012#ifdef IEMNATIVE_REG_FIXED_TMP0
3013 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3014#endif
3015#ifdef IEMNATIVE_REG_FIXED_TMP1
3016 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3017#endif
3018#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3019 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3020#endif
3021
3022#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3023# ifdef RT_ARCH_ARM64
3024 /*
3025 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3026 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3027 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3028 * and the register allocator assumes that it will be always free when the lower is picked.
3029 */
3030 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3031# else
3032 uint32_t const fFixedAdditional = 0;
3033# endif
3034
3035 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3036 | fFixedAdditional
3037# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3038 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3039# endif
3040 ;
3041 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3042 pReNative->Core.bmGstSimdRegShadows = 0;
3043 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3044 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3045
3046 /* Full host register reinit: */
3047 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3048 {
3049 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3050 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3051 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3052 }
3053
3054 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3055 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3056 {
3057 fRegs &= ~RT_BIT_32(idxReg);
3058 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3059 }
3060
3061#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3062 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3063#endif
3064
3065#endif
3066
3067 return pReNative;
3068}
3069
3070
3071/**
3072 * Allocates and initializes the native recompiler state.
3073 *
3074 * This is called the first time an EMT wants to recompile something.
3075 *
3076 * @returns Pointer to the new recompiler state.
3077 * @param pVCpu The cross context virtual CPU structure of the calling
3078 * thread.
3079 * @param pTb The TB that's about to be recompiled.
3080 * @thread EMT(pVCpu)
3081 */
3082static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3083{
3084 VMCPU_ASSERT_EMT(pVCpu);
3085
3086 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3087 AssertReturn(pReNative, NULL);
3088
3089 /*
3090 * Try allocate all the buffers and stuff we need.
3091 */
3092 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3093 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3094 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3095#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3096 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3097#endif
3098 if (RT_LIKELY( pReNative->pInstrBuf
3099 && pReNative->paLabels
3100 && pReNative->paFixups)
3101#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3102 && pReNative->pDbgInfo
3103#endif
3104 )
3105 {
3106 /*
3107 * Set the buffer & array sizes on success.
3108 */
3109 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3110 pReNative->cLabelsAlloc = _8K;
3111 pReNative->cFixupsAlloc = _16K;
3112#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3113 pReNative->cDbgInfoAlloc = _16K;
3114#endif
3115
3116 /* Other constant stuff: */
3117 pReNative->pVCpu = pVCpu;
3118
3119 /*
3120 * Done, just need to save it and reinit it.
3121 */
3122 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3123 return iemNativeReInit(pReNative, pTb);
3124 }
3125
3126 /*
3127 * Failed. Cleanup and return.
3128 */
3129 AssertFailed();
3130 RTMemFree(pReNative->pInstrBuf);
3131 RTMemFree(pReNative->paLabels);
3132 RTMemFree(pReNative->paFixups);
3133#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3134 RTMemFree(pReNative->pDbgInfo);
3135#endif
3136 RTMemFree(pReNative);
3137 return NULL;
3138}
3139
3140
3141/**
3142 * Creates a label
3143 *
3144 * If the label does not yet have a defined position,
3145 * call iemNativeLabelDefine() later to set it.
3146 *
3147 * @returns Label ID. Throws VBox status code on failure, so no need to check
3148 * the return value.
3149 * @param pReNative The native recompile state.
3150 * @param enmType The label type.
3151 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3152 * label is not yet defined (default).
3153 * @param uData Data associated with the lable. Only applicable to
3154 * certain type of labels. Default is zero.
3155 */
3156DECL_HIDDEN_THROW(uint32_t)
3157iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3158 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3159{
3160 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3161
3162 /*
3163 * Locate existing label definition.
3164 *
3165 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3166 * and uData is zero.
3167 */
3168 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3169 uint32_t const cLabels = pReNative->cLabels;
3170 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3171#ifndef VBOX_STRICT
3172 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3173 && offWhere == UINT32_MAX
3174 && uData == 0
3175#endif
3176 )
3177 {
3178#ifndef VBOX_STRICT
3179 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3180 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3181 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3182 if (idxLabel < pReNative->cLabels)
3183 return idxLabel;
3184#else
3185 for (uint32_t i = 0; i < cLabels; i++)
3186 if ( paLabels[i].enmType == enmType
3187 && paLabels[i].uData == uData)
3188 {
3189 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3190 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3191 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3192 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3193 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3194 return i;
3195 }
3196 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3197 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3198#endif
3199 }
3200
3201 /*
3202 * Make sure we've got room for another label.
3203 */
3204 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3205 { /* likely */ }
3206 else
3207 {
3208 uint32_t cNew = pReNative->cLabelsAlloc;
3209 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3210 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3211 cNew *= 2;
3212 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3213 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3214 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3215 pReNative->paLabels = paLabels;
3216 pReNative->cLabelsAlloc = cNew;
3217 }
3218
3219 /*
3220 * Define a new label.
3221 */
3222 paLabels[cLabels].off = offWhere;
3223 paLabels[cLabels].enmType = enmType;
3224 paLabels[cLabels].uData = uData;
3225 pReNative->cLabels = cLabels + 1;
3226
3227 Assert((unsigned)enmType < 64);
3228 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3229
3230 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3231 {
3232 Assert(uData == 0);
3233 pReNative->aidxUniqueLabels[enmType] = cLabels;
3234 }
3235
3236 if (offWhere != UINT32_MAX)
3237 {
3238#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3239 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3240 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3241#endif
3242 }
3243 return cLabels;
3244}
3245
3246
3247/**
3248 * Defines the location of an existing label.
3249 *
3250 * @param pReNative The native recompile state.
3251 * @param idxLabel The label to define.
3252 * @param offWhere The position.
3253 */
3254DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3255{
3256 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3257 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3258 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3259 pLabel->off = offWhere;
3260#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3261 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3262 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3263#endif
3264}
3265
3266
3267/**
3268 * Looks up a lable.
3269 *
3270 * @returns Label ID if found, UINT32_MAX if not.
3271 */
3272static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3273 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3274{
3275 Assert((unsigned)enmType < 64);
3276 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3277 {
3278 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3279 return pReNative->aidxUniqueLabels[enmType];
3280
3281 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3282 uint32_t const cLabels = pReNative->cLabels;
3283 for (uint32_t i = 0; i < cLabels; i++)
3284 if ( paLabels[i].enmType == enmType
3285 && paLabels[i].uData == uData
3286 && ( paLabels[i].off == offWhere
3287 || offWhere == UINT32_MAX
3288 || paLabels[i].off == UINT32_MAX))
3289 return i;
3290 }
3291 return UINT32_MAX;
3292}
3293
3294
3295/**
3296 * Adds a fixup.
3297 *
3298 * @throws VBox status code (int) on failure.
3299 * @param pReNative The native recompile state.
3300 * @param offWhere The instruction offset of the fixup location.
3301 * @param idxLabel The target label ID for the fixup.
3302 * @param enmType The fixup type.
3303 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3304 */
3305DECL_HIDDEN_THROW(void)
3306iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3307 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3308{
3309 Assert(idxLabel <= UINT16_MAX);
3310 Assert((unsigned)enmType <= UINT8_MAX);
3311
3312 /*
3313 * Make sure we've room.
3314 */
3315 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3316 uint32_t const cFixups = pReNative->cFixups;
3317 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3318 { /* likely */ }
3319 else
3320 {
3321 uint32_t cNew = pReNative->cFixupsAlloc;
3322 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3323 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3324 cNew *= 2;
3325 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3326 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3327 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3328 pReNative->paFixups = paFixups;
3329 pReNative->cFixupsAlloc = cNew;
3330 }
3331
3332 /*
3333 * Add the fixup.
3334 */
3335 paFixups[cFixups].off = offWhere;
3336 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3337 paFixups[cFixups].enmType = enmType;
3338 paFixups[cFixups].offAddend = offAddend;
3339 pReNative->cFixups = cFixups + 1;
3340}
3341
3342
3343/**
3344 * Slow code path for iemNativeInstrBufEnsure.
3345 */
3346DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3347{
3348 /* Double the buffer size till we meet the request. */
3349 uint32_t cNew = pReNative->cInstrBufAlloc;
3350 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3351 do
3352 cNew *= 2;
3353 while (cNew < off + cInstrReq);
3354
3355 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3356#ifdef RT_ARCH_ARM64
3357 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3358#else
3359 uint32_t const cbMaxInstrBuf = _2M;
3360#endif
3361 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3362
3363 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3364 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3365
3366#ifdef VBOX_STRICT
3367 pReNative->offInstrBufChecked = off + cInstrReq;
3368#endif
3369 pReNative->cInstrBufAlloc = cNew;
3370 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3371}
3372
3373#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3374
3375/**
3376 * Grows the static debug info array used during recompilation.
3377 *
3378 * @returns Pointer to the new debug info block; throws VBox status code on
3379 * failure, so no need to check the return value.
3380 */
3381DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3382{
3383 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3384 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3385 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3386 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3387 pReNative->pDbgInfo = pDbgInfo;
3388 pReNative->cDbgInfoAlloc = cNew;
3389 return pDbgInfo;
3390}
3391
3392
3393/**
3394 * Adds a new debug info uninitialized entry, returning the pointer to it.
3395 */
3396DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3397{
3398 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3399 { /* likely */ }
3400 else
3401 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3402 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3403}
3404
3405
3406/**
3407 * Debug Info: Adds a native offset record, if necessary.
3408 */
3409static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3410{
3411 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3412
3413 /*
3414 * Search backwards to see if we've got a similar record already.
3415 */
3416 uint32_t idx = pDbgInfo->cEntries;
3417 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3418 while (idx-- > idxStop)
3419 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3420 {
3421 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3422 return;
3423 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3424 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3425 break;
3426 }
3427
3428 /*
3429 * Add it.
3430 */
3431 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3432 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3433 pEntry->NativeOffset.offNative = off;
3434}
3435
3436
3437/**
3438 * Debug Info: Record info about a label.
3439 */
3440static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3441{
3442 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3443 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3444 pEntry->Label.uUnused = 0;
3445 pEntry->Label.enmLabel = (uint8_t)enmType;
3446 pEntry->Label.uData = uData;
3447}
3448
3449
3450/**
3451 * Debug Info: Record info about a threaded call.
3452 */
3453static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3454{
3455 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3456 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3457 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3458 pEntry->ThreadedCall.uUnused = 0;
3459 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3460}
3461
3462
3463/**
3464 * Debug Info: Record info about a new guest instruction.
3465 */
3466static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3467{
3468 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3469 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3470 pEntry->GuestInstruction.uUnused = 0;
3471 pEntry->GuestInstruction.fExec = fExec;
3472}
3473
3474
3475/**
3476 * Debug Info: Record info about guest register shadowing.
3477 */
3478static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3479 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3480{
3481 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3482 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3483 pEntry->GuestRegShadowing.uUnused = 0;
3484 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3485 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3486 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3487}
3488
3489
3490# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3491/**
3492 * Debug Info: Record info about guest register shadowing.
3493 */
3494static void iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3495 uint8_t idxHstSimdReg = UINT8_MAX, uint8_t idxHstSimdRegPrev = UINT8_MAX)
3496{
3497 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3498 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3499 pEntry->GuestSimdRegShadowing.uUnused = 0;
3500 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3501 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3502 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3503}
3504# endif
3505
3506
3507# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3508/**
3509 * Debug Info: Record info about delayed RIP updates.
3510 */
3511static void iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3512{
3513 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3514 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3515 pEntry->DelayedPcUpdate.offPc = offPc;
3516 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3517}
3518# endif
3519
3520#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3521
3522
3523/*********************************************************************************************************************************
3524* Register Allocator *
3525*********************************************************************************************************************************/
3526
3527/**
3528 * Register parameter indexes (indexed by argument number).
3529 */
3530DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3531{
3532 IEMNATIVE_CALL_ARG0_GREG,
3533 IEMNATIVE_CALL_ARG1_GREG,
3534 IEMNATIVE_CALL_ARG2_GREG,
3535 IEMNATIVE_CALL_ARG3_GREG,
3536#if defined(IEMNATIVE_CALL_ARG4_GREG)
3537 IEMNATIVE_CALL_ARG4_GREG,
3538# if defined(IEMNATIVE_CALL_ARG5_GREG)
3539 IEMNATIVE_CALL_ARG5_GREG,
3540# if defined(IEMNATIVE_CALL_ARG6_GREG)
3541 IEMNATIVE_CALL_ARG6_GREG,
3542# if defined(IEMNATIVE_CALL_ARG7_GREG)
3543 IEMNATIVE_CALL_ARG7_GREG,
3544# endif
3545# endif
3546# endif
3547#endif
3548};
3549
3550/**
3551 * Call register masks indexed by argument count.
3552 */
3553DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3554{
3555 0,
3556 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3557 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3558 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3559 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3560 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3561#if defined(IEMNATIVE_CALL_ARG4_GREG)
3562 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3563 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3564# if defined(IEMNATIVE_CALL_ARG5_GREG)
3565 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3566 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3567# if defined(IEMNATIVE_CALL_ARG6_GREG)
3568 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3569 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3570 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3571# if defined(IEMNATIVE_CALL_ARG7_GREG)
3572 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3573 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3574 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3575# endif
3576# endif
3577# endif
3578#endif
3579};
3580
3581#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3582/**
3583 * BP offset of the stack argument slots.
3584 *
3585 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3586 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3587 */
3588DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3589{
3590 IEMNATIVE_FP_OFF_STACK_ARG0,
3591# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3592 IEMNATIVE_FP_OFF_STACK_ARG1,
3593# endif
3594# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3595 IEMNATIVE_FP_OFF_STACK_ARG2,
3596# endif
3597# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3598 IEMNATIVE_FP_OFF_STACK_ARG3,
3599# endif
3600};
3601AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3602#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3603
3604/**
3605 * Info about shadowed guest register values.
3606 * @see IEMNATIVEGSTREG
3607 */
3608static struct
3609{
3610 /** Offset in VMCPU. */
3611 uint32_t off;
3612 /** The field size. */
3613 uint8_t cb;
3614 /** Name (for logging). */
3615 const char *pszName;
3616} const g_aGstShadowInfo[] =
3617{
3618#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3619 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3620 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3621 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3622 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3623 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3624 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3625 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3626 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3627 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3628 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3629 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3630 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3631 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3632 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3633 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3634 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3635 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3636 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3637 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3638 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3639 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3640 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3641 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3642 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3643 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3644 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3645 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3646 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3647 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3648 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3649 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3650 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3651 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3652 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3653 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3654 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3655 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3656 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3657 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3658 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3659 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3660 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3661 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3662 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3663 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3664 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3665 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3666 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3667#undef CPUMCTX_OFF_AND_SIZE
3668};
3669AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3670
3671
3672/** Host CPU general purpose register names. */
3673DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3674{
3675#ifdef RT_ARCH_AMD64
3676 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3677#elif RT_ARCH_ARM64
3678 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3679 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3680#else
3681# error "port me"
3682#endif
3683};
3684
3685
3686DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3687 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3688{
3689 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3690
3691 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3692 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3693 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3694 return (uint8_t)idxReg;
3695}
3696
3697
3698#if 0 /* unused */
3699/**
3700 * Tries to locate a suitable register in the given register mask.
3701 *
3702 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3703 * failed.
3704 *
3705 * @returns Host register number on success, returns UINT8_MAX on failure.
3706 */
3707static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3708{
3709 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3710 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3711 if (fRegs)
3712 {
3713 /** @todo pick better here: */
3714 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3715
3716 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3717 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3718 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3719 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3720
3721 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3722 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3723 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3724 return idxReg;
3725 }
3726 return UINT8_MAX;
3727}
3728#endif /* unused */
3729
3730
3731/**
3732 * Locate a register, possibly freeing one up.
3733 *
3734 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3735 * failed.
3736 *
3737 * @returns Host register number on success. Returns UINT8_MAX if no registers
3738 * found, the caller is supposed to deal with this and raise a
3739 * allocation type specific status code (if desired).
3740 *
3741 * @throws VBox status code if we're run into trouble spilling a variable of
3742 * recording debug info. Does NOT throw anything if we're out of
3743 * registers, though.
3744 */
3745static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3746 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3747{
3748 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3749 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3750 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3751
3752 /*
3753 * Try a freed register that's shadowing a guest register.
3754 */
3755 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3756 if (fRegs)
3757 {
3758 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3759
3760#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3761 /*
3762 * When we have livness information, we use it to kick out all shadowed
3763 * guest register that will not be needed any more in this TB. If we're
3764 * lucky, this may prevent us from ending up here again.
3765 *
3766 * Note! We must consider the previous entry here so we don't free
3767 * anything that the current threaded function requires (current
3768 * entry is produced by the next threaded function).
3769 */
3770 uint32_t const idxCurCall = pReNative->idxCurCall;
3771 if (idxCurCall > 0)
3772 {
3773 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3774
3775# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3776 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3777 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3778 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3779#else
3780 /* Construct a mask of the registers not in the read or write state.
3781 Note! We could skips writes, if they aren't from us, as this is just
3782 a hack to prevent trashing registers that have just been written
3783 or will be written when we retire the current instruction. */
3784 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3785 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3786 & IEMLIVENESSBIT_MASK;
3787#endif
3788 /* Merge EFLAGS. */
3789 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3790 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3791 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3792 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3793 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3794
3795 /* If it matches any shadowed registers. */
3796 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3797 {
3798 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3799 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3800 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3801
3802 /* See if we've got any unshadowed registers we can return now. */
3803 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3804 if (fUnshadowedRegs)
3805 {
3806 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3807 return (fPreferVolatile
3808 ? ASMBitFirstSetU32(fUnshadowedRegs)
3809 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3810 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3811 - 1;
3812 }
3813 }
3814 }
3815#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3816
3817 unsigned const idxReg = (fPreferVolatile
3818 ? ASMBitFirstSetU32(fRegs)
3819 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3820 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3821 - 1;
3822
3823 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3824 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3825 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3826 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3827
3828 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3829 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3830 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3831 return idxReg;
3832 }
3833
3834 /*
3835 * Try free up a variable that's in a register.
3836 *
3837 * We do two rounds here, first evacuating variables we don't need to be
3838 * saved on the stack, then in the second round move things to the stack.
3839 */
3840 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3841 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3842 {
3843 uint32_t fVars = pReNative->Core.bmVars;
3844 while (fVars)
3845 {
3846 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3847 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3848 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3849 && (RT_BIT_32(idxReg) & fRegMask)
3850 && ( iLoop == 0
3851 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3852 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3853 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3854 {
3855 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3856 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3857 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3858 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3859 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3860 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3861
3862 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3863 {
3864 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3865 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3866 }
3867
3868 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3869 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3870
3871 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3872 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3873 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3874 return idxReg;
3875 }
3876 fVars &= ~RT_BIT_32(idxVar);
3877 }
3878 }
3879
3880 return UINT8_MAX;
3881}
3882
3883
3884/**
3885 * Reassigns a variable to a different register specified by the caller.
3886 *
3887 * @returns The new code buffer position.
3888 * @param pReNative The native recompile state.
3889 * @param off The current code buffer position.
3890 * @param idxVar The variable index.
3891 * @param idxRegOld The old host register number.
3892 * @param idxRegNew The new host register number.
3893 * @param pszCaller The caller for logging.
3894 */
3895static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3896 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3897{
3898 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3899 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3900 RT_NOREF(pszCaller);
3901
3902 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3903
3904 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3905 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3906 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3907 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3908
3909 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3910 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3911 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3912 if (fGstRegShadows)
3913 {
3914 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3915 | RT_BIT_32(idxRegNew);
3916 while (fGstRegShadows)
3917 {
3918 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3919 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3920
3921 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3922 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3923 }
3924 }
3925
3926 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3927 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3928 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3929 return off;
3930}
3931
3932
3933/**
3934 * Moves a variable to a different register or spills it onto the stack.
3935 *
3936 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3937 * kinds can easily be recreated if needed later.
3938 *
3939 * @returns The new code buffer position.
3940 * @param pReNative The native recompile state.
3941 * @param off The current code buffer position.
3942 * @param idxVar The variable index.
3943 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3944 * call-volatile registers.
3945 */
3946static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3947 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3948{
3949 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3950 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3951 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3952 Assert(!pVar->fRegAcquired);
3953
3954 uint8_t const idxRegOld = pVar->idxReg;
3955 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3956 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3957 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3958 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3959 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3960 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3961 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3962 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3963
3964
3965 /** @todo Add statistics on this.*/
3966 /** @todo Implement basic variable liveness analysis (python) so variables
3967 * can be freed immediately once no longer used. This has the potential to
3968 * be trashing registers and stack for dead variables.
3969 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3970
3971 /*
3972 * First try move it to a different register, as that's cheaper.
3973 */
3974 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3975 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3976 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3977 if (fRegs)
3978 {
3979 /* Avoid using shadow registers, if possible. */
3980 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3981 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3982 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3983 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3984 }
3985
3986 /*
3987 * Otherwise we must spill the register onto the stack.
3988 */
3989 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3990 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3991 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3992 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3993
3994 pVar->idxReg = UINT8_MAX;
3995 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3996 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3997 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3998 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3999 return off;
4000}
4001
4002
4003/**
4004 * Allocates a temporary host general purpose register.
4005 *
4006 * This may emit code to save register content onto the stack in order to free
4007 * up a register.
4008 *
4009 * @returns The host register number; throws VBox status code on failure,
4010 * so no need to check the return value.
4011 * @param pReNative The native recompile state.
4012 * @param poff Pointer to the variable with the code buffer position.
4013 * This will be update if we need to move a variable from
4014 * register to stack in order to satisfy the request.
4015 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4016 * registers (@c true, default) or the other way around
4017 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4018 */
4019DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4020{
4021 /*
4022 * Try find a completely unused register, preferably a call-volatile one.
4023 */
4024 uint8_t idxReg;
4025 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4026 & ~pReNative->Core.bmHstRegsWithGstShadow
4027 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4028 if (fRegs)
4029 {
4030 if (fPreferVolatile)
4031 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4032 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4033 else
4034 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4035 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4036 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4037 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4038 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4039 }
4040 else
4041 {
4042 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4043 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4044 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4045 }
4046 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4047}
4048
4049
4050/**
4051 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4052 * registers.
4053 *
4054 * @returns The host register number; throws VBox status code on failure,
4055 * so no need to check the return value.
4056 * @param pReNative The native recompile state.
4057 * @param poff Pointer to the variable with the code buffer position.
4058 * This will be update if we need to move a variable from
4059 * register to stack in order to satisfy the request.
4060 * @param fRegMask Mask of acceptable registers.
4061 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4062 * registers (@c true, default) or the other way around
4063 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4064 */
4065DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4066 bool fPreferVolatile /*= true*/)
4067{
4068 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4069 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4070
4071 /*
4072 * Try find a completely unused register, preferably a call-volatile one.
4073 */
4074 uint8_t idxReg;
4075 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4076 & ~pReNative->Core.bmHstRegsWithGstShadow
4077 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4078 & fRegMask;
4079 if (fRegs)
4080 {
4081 if (fPreferVolatile)
4082 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4083 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4084 else
4085 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4086 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4087 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4088 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4089 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4090 }
4091 else
4092 {
4093 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4094 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4095 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4096 }
4097 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4098}
4099
4100
4101/**
4102 * Allocates a temporary register for loading an immediate value into.
4103 *
4104 * This will emit code to load the immediate, unless there happens to be an
4105 * unused register with the value already loaded.
4106 *
4107 * The caller will not modify the returned register, it must be considered
4108 * read-only. Free using iemNativeRegFreeTmpImm.
4109 *
4110 * @returns The host register number; throws VBox status code on failure, so no
4111 * need to check the return value.
4112 * @param pReNative The native recompile state.
4113 * @param poff Pointer to the variable with the code buffer position.
4114 * @param uImm The immediate value that the register must hold upon
4115 * return.
4116 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4117 * registers (@c true, default) or the other way around
4118 * (@c false).
4119 *
4120 * @note Reusing immediate values has not been implemented yet.
4121 */
4122DECL_HIDDEN_THROW(uint8_t)
4123iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4124{
4125 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4126 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4127 return idxReg;
4128}
4129
4130#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4131
4132# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4133/**
4134 * Helper for iemNativeLivenessGetStateByGstReg.
4135 *
4136 * @returns IEMLIVENESS_STATE_XXX
4137 * @param fMergedStateExp2 This is the RT_BIT_32() of each sub-state
4138 * ORed together.
4139 */
4140DECL_FORCE_INLINE(uint32_t)
4141iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
4142{
4143 /* INPUT trumps anything else. */
4144 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
4145 return IEMLIVENESS_STATE_INPUT;
4146
4147 /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
4148 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
4149 {
4150 /* If not all sub-fields are clobbered they must be considered INPUT. */
4151 if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
4152 return IEMLIVENESS_STATE_INPUT;
4153 return IEMLIVENESS_STATE_CLOBBERED;
4154 }
4155
4156 /* XCPT_OR_CALL trumps UNUSED. */
4157 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
4158 return IEMLIVENESS_STATE_XCPT_OR_CALL;
4159
4160 return IEMLIVENESS_STATE_UNUSED;
4161}
4162# endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
4163
4164
4165DECL_FORCE_INLINE(uint32_t)
4166iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
4167{
4168# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4169 return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4170 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
4171# else
4172 return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4173 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
4174 | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
4175 | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
4176# endif
4177}
4178
4179
4180DECL_FORCE_INLINE(uint32_t)
4181iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
4182{
4183 uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
4184 if (enmGstReg == kIemNativeGstReg_EFlags)
4185 {
4186 /* Merge the eflags states to one. */
4187# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4188 uRet = RT_BIT_32(uRet);
4189 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
4190 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
4191 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
4192 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
4193 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
4194 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
4195 uRet = iemNativeLivenessMergeExpandedEFlagsState(uRet);
4196# else
4197 AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
4198 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
4199 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
4200 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
4201 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
4202 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
4203 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
4204# endif
4205 }
4206 return uRet;
4207}
4208
4209
4210# ifdef VBOX_STRICT
4211/** For assertions only, user checks that idxCurCall isn't zerow. */
4212DECL_FORCE_INLINE(uint32_t)
4213iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
4214{
4215 return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
4216}
4217# endif /* VBOX_STRICT */
4218
4219#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4220
4221/**
4222 * Marks host register @a idxHstReg as containing a shadow copy of guest
4223 * register @a enmGstReg.
4224 *
4225 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
4226 * host register before calling.
4227 */
4228DECL_FORCE_INLINE(void)
4229iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4230{
4231 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
4232 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4233 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4234
4235 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
4236 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
4237 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
4238 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4239#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4240 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4241 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
4242#else
4243 RT_NOREF(off);
4244#endif
4245}
4246
4247
4248/**
4249 * Clear any guest register shadow claims from @a idxHstReg.
4250 *
4251 * The register does not need to be shadowing any guest registers.
4252 */
4253DECL_FORCE_INLINE(void)
4254iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
4255{
4256 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4257 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4258 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4259 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4260 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4261
4262#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4263 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4264 if (fGstRegs)
4265 {
4266 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
4267 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4268 while (fGstRegs)
4269 {
4270 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4271 fGstRegs &= ~RT_BIT_64(iGstReg);
4272 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
4273 }
4274 }
4275#else
4276 RT_NOREF(off);
4277#endif
4278
4279 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4280 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4281 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4282}
4283
4284
4285/**
4286 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
4287 * and global overview flags.
4288 */
4289DECL_FORCE_INLINE(void)
4290iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4291{
4292 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4293 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4294 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4295 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4296 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
4297 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4298 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4299
4300#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4301 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4302 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
4303#else
4304 RT_NOREF(off);
4305#endif
4306
4307 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4308 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4309 if (!fGstRegShadowsNew)
4310 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4311 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4312}
4313
4314
4315#if 0 /* unused */
4316/**
4317 * Clear any guest register shadow claim for @a enmGstReg.
4318 */
4319DECL_FORCE_INLINE(void)
4320iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4321{
4322 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4323 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4324 {
4325 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4326 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4327 }
4328}
4329#endif
4330
4331
4332/**
4333 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4334 * as the new shadow of it.
4335 *
4336 * Unlike the other guest reg shadow helpers, this does the logging for you.
4337 * However, it is the liveness state is not asserted here, the caller must do
4338 * that.
4339 */
4340DECL_FORCE_INLINE(void)
4341iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4342 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4343{
4344 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4345 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4346 {
4347 uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
4348 Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4349 if (idxHstRegOld == idxHstRegNew)
4350 return;
4351 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4352 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
4353 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4354 }
4355 else
4356 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4357 g_aGstShadowInfo[enmGstReg].pszName));
4358 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4359}
4360
4361
4362/**
4363 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4364 * to @a idxRegTo.
4365 */
4366DECL_FORCE_INLINE(void)
4367iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4368 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4369{
4370 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4371 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4372 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4373 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4374 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4375 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4376 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4377 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4378 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4379
4380 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4381 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4382 if (!fGstRegShadowsFrom)
4383 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4384 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4385 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4386 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4387#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4388 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4389 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4390#else
4391 RT_NOREF(off);
4392#endif
4393}
4394
4395
4396/**
4397 * Allocates a temporary host general purpose register for keeping a guest
4398 * register value.
4399 *
4400 * Since we may already have a register holding the guest register value,
4401 * code will be emitted to do the loading if that's not the case. Code may also
4402 * be emitted if we have to free up a register to satify the request.
4403 *
4404 * @returns The host register number; throws VBox status code on failure, so no
4405 * need to check the return value.
4406 * @param pReNative The native recompile state.
4407 * @param poff Pointer to the variable with the code buffer
4408 * position. This will be update if we need to move a
4409 * variable from register to stack in order to satisfy
4410 * the request.
4411 * @param enmGstReg The guest register that will is to be updated.
4412 * @param enmIntendedUse How the caller will be using the host register.
4413 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4414 * register is okay (default). The ASSUMPTION here is
4415 * that the caller has already flushed all volatile
4416 * registers, so this is only applied if we allocate a
4417 * new register.
4418 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4419 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4420 */
4421DECL_HIDDEN_THROW(uint8_t)
4422iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4423 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4424 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4425{
4426 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4427#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4428 AssertMsg( fSkipLivenessAssert
4429 || pReNative->idxCurCall == 0
4430 || enmGstReg == kIemNativeGstReg_Pc
4431 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4432 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4433 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4434 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4435 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4436 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4437#endif
4438 RT_NOREF(fSkipLivenessAssert);
4439#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4440 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4441#endif
4442 uint32_t const fRegMask = !fNoVolatileRegs
4443 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4444 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4445
4446 /*
4447 * First check if the guest register value is already in a host register.
4448 */
4449 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4450 {
4451 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4452 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4453 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4454 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4455
4456 /* It's not supposed to be allocated... */
4457 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4458 {
4459 /*
4460 * If the register will trash the guest shadow copy, try find a
4461 * completely unused register we can use instead. If that fails,
4462 * we need to disassociate the host reg from the guest reg.
4463 */
4464 /** @todo would be nice to know if preserving the register is in any way helpful. */
4465 /* If the purpose is calculations, try duplicate the register value as
4466 we'll be clobbering the shadow. */
4467 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4468 && ( ~pReNative->Core.bmHstRegs
4469 & ~pReNative->Core.bmHstRegsWithGstShadow
4470 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4471 {
4472 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4473
4474 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4475
4476 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4477 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4478 g_apszIemNativeHstRegNames[idxRegNew]));
4479 idxReg = idxRegNew;
4480 }
4481 /* If the current register matches the restrictions, go ahead and allocate
4482 it for the caller. */
4483 else if (fRegMask & RT_BIT_32(idxReg))
4484 {
4485 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4486 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4487 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4488 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4489 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4490 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4491 else
4492 {
4493 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4494 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4495 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4496 }
4497 }
4498 /* Otherwise, allocate a register that satisfies the caller and transfer
4499 the shadowing if compatible with the intended use. (This basically
4500 means the call wants a non-volatile register (RSP push/pop scenario).) */
4501 else
4502 {
4503 Assert(fNoVolatileRegs);
4504 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4505 !fNoVolatileRegs
4506 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4507 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4508 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4509 {
4510 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4511 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4512 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4513 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4514 }
4515 else
4516 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4517 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4518 g_apszIemNativeHstRegNames[idxRegNew]));
4519 idxReg = idxRegNew;
4520 }
4521 }
4522 else
4523 {
4524 /*
4525 * Oops. Shadowed guest register already allocated!
4526 *
4527 * Allocate a new register, copy the value and, if updating, the
4528 * guest shadow copy assignment to the new register.
4529 */
4530 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4531 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4532 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4533 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4534
4535 /** @todo share register for readonly access. */
4536 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4537 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4538
4539 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4540 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4541
4542 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4543 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4544 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4545 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4546 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4547 else
4548 {
4549 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4550 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4551 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4552 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4553 }
4554 idxReg = idxRegNew;
4555 }
4556 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4557
4558#ifdef VBOX_STRICT
4559 /* Strict builds: Check that the value is correct. */
4560 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4561#endif
4562
4563 return idxReg;
4564 }
4565
4566 /*
4567 * Allocate a new register, load it with the guest value and designate it as a copy of the
4568 */
4569 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4570
4571 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4572 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4573
4574 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4575 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4576 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4577 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4578
4579 return idxRegNew;
4580}
4581
4582
4583/**
4584 * Allocates a temporary host general purpose register that already holds the
4585 * given guest register value.
4586 *
4587 * The use case for this function is places where the shadowing state cannot be
4588 * modified due to branching and such. This will fail if the we don't have a
4589 * current shadow copy handy or if it's incompatible. The only code that will
4590 * be emitted here is value checking code in strict builds.
4591 *
4592 * The intended use can only be readonly!
4593 *
4594 * @returns The host register number, UINT8_MAX if not present.
4595 * @param pReNative The native recompile state.
4596 * @param poff Pointer to the instruction buffer offset.
4597 * Will be updated in strict builds if a register is
4598 * found.
4599 * @param enmGstReg The guest register that will is to be updated.
4600 * @note In strict builds, this may throw instruction buffer growth failures.
4601 * Non-strict builds will not throw anything.
4602 * @sa iemNativeRegAllocTmpForGuestReg
4603 */
4604DECL_HIDDEN_THROW(uint8_t)
4605iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4606{
4607 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4608#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4609 AssertMsg( pReNative->idxCurCall == 0
4610 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4611 || enmGstReg == kIemNativeGstReg_Pc,
4612 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4613#endif
4614
4615 /*
4616 * First check if the guest register value is already in a host register.
4617 */
4618 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4619 {
4620 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4621 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4622 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4623 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4624
4625 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4626 {
4627 /*
4628 * We only do readonly use here, so easy compared to the other
4629 * variant of this code.
4630 */
4631 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4632 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4633 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4634 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4635 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4636
4637#ifdef VBOX_STRICT
4638 /* Strict builds: Check that the value is correct. */
4639 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4640#else
4641 RT_NOREF(poff);
4642#endif
4643 return idxReg;
4644 }
4645 }
4646
4647 return UINT8_MAX;
4648}
4649
4650
4651/**
4652 * Allocates argument registers for a function call.
4653 *
4654 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4655 * need to check the return value.
4656 * @param pReNative The native recompile state.
4657 * @param off The current code buffer offset.
4658 * @param cArgs The number of arguments the function call takes.
4659 */
4660DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4661{
4662 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4663 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4664 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4665 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4666
4667 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4668 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4669 else if (cArgs == 0)
4670 return true;
4671
4672 /*
4673 * Do we get luck and all register are free and not shadowing anything?
4674 */
4675 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4676 for (uint32_t i = 0; i < cArgs; i++)
4677 {
4678 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4679 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4680 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4681 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4682 }
4683 /*
4684 * Okay, not lucky so we have to free up the registers.
4685 */
4686 else
4687 for (uint32_t i = 0; i < cArgs; i++)
4688 {
4689 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4690 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4691 {
4692 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4693 {
4694 case kIemNativeWhat_Var:
4695 {
4696 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4697 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4698 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4699 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4700 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4701
4702 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4703 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4704 else
4705 {
4706 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4707 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4708 }
4709 break;
4710 }
4711
4712 case kIemNativeWhat_Tmp:
4713 case kIemNativeWhat_Arg:
4714 case kIemNativeWhat_rc:
4715 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4716 default:
4717 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4718 }
4719
4720 }
4721 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4722 {
4723 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4724 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4725 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4726 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4727 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4728 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4729 }
4730 else
4731 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4732 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4733 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4734 }
4735 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4736 return true;
4737}
4738
4739
4740DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4741
4742
4743#if 0
4744/**
4745 * Frees a register assignment of any type.
4746 *
4747 * @param pReNative The native recompile state.
4748 * @param idxHstReg The register to free.
4749 *
4750 * @note Does not update variables.
4751 */
4752DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4753{
4754 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4755 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4756 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4757 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4758 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4759 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4760 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4761 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4762 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4763 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4764 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4765 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4766 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4767 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4768
4769 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4770 /* no flushing, right:
4771 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4772 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4773 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4774 */
4775}
4776#endif
4777
4778
4779/**
4780 * Frees a temporary register.
4781 *
4782 * Any shadow copies of guest registers assigned to the host register will not
4783 * be flushed by this operation.
4784 */
4785DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4786{
4787 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4788 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4789 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4790 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4791 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4792}
4793
4794
4795/**
4796 * Frees a temporary immediate register.
4797 *
4798 * It is assumed that the call has not modified the register, so it still hold
4799 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4800 */
4801DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4802{
4803 iemNativeRegFreeTmp(pReNative, idxHstReg);
4804}
4805
4806
4807/**
4808 * Frees a register assigned to a variable.
4809 *
4810 * The register will be disassociated from the variable.
4811 */
4812DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4813{
4814 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4815 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4816 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4817 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4818 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4819
4820 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4821 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4822 if (!fFlushShadows)
4823 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4824 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4825 else
4826 {
4827 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4828 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4829 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4830 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4831 uint64_t fGstRegShadows = fGstRegShadowsOld;
4832 while (fGstRegShadows)
4833 {
4834 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4835 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4836
4837 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4838 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4839 }
4840 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4841 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4842 }
4843}
4844
4845
4846/**
4847 * Called right before emitting a call instruction to move anything important
4848 * out of call-volatile registers, free and flush the call-volatile registers,
4849 * optionally freeing argument variables.
4850 *
4851 * @returns New code buffer offset, UINT32_MAX on failure.
4852 * @param pReNative The native recompile state.
4853 * @param off The code buffer offset.
4854 * @param cArgs The number of arguments the function call takes.
4855 * It is presumed that the host register part of these have
4856 * been allocated as such already and won't need moving,
4857 * just freeing.
4858 * @param fKeepVars Mask of variables that should keep their register
4859 * assignments. Caller must take care to handle these.
4860 */
4861DECL_HIDDEN_THROW(uint32_t)
4862iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4863{
4864 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4865
4866 /* fKeepVars will reduce this mask. */
4867 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4868
4869 /*
4870 * Move anything important out of volatile registers.
4871 */
4872 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4873 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4874 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4875#ifdef IEMNATIVE_REG_FIXED_TMP0
4876 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4877#endif
4878#ifdef IEMNATIVE_REG_FIXED_TMP1
4879 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4880#endif
4881#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4882 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4883#endif
4884 & ~g_afIemNativeCallRegs[cArgs];
4885
4886 fRegsToMove &= pReNative->Core.bmHstRegs;
4887 if (!fRegsToMove)
4888 { /* likely */ }
4889 else
4890 {
4891 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4892 while (fRegsToMove != 0)
4893 {
4894 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4895 fRegsToMove &= ~RT_BIT_32(idxReg);
4896
4897 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4898 {
4899 case kIemNativeWhat_Var:
4900 {
4901 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4902 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4903 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4904 Assert(pVar->idxReg == idxReg);
4905 if (!(RT_BIT_32(idxVar) & fKeepVars))
4906 {
4907 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4908 idxVar, pVar->enmKind, pVar->idxReg));
4909 if (pVar->enmKind != kIemNativeVarKind_Stack)
4910 pVar->idxReg = UINT8_MAX;
4911 else
4912 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4913 }
4914 else
4915 fRegsToFree &= ~RT_BIT_32(idxReg);
4916 continue;
4917 }
4918
4919 case kIemNativeWhat_Arg:
4920 AssertMsgFailed(("What?!?: %u\n", idxReg));
4921 continue;
4922
4923 case kIemNativeWhat_rc:
4924 case kIemNativeWhat_Tmp:
4925 AssertMsgFailed(("Missing free: %u\n", idxReg));
4926 continue;
4927
4928 case kIemNativeWhat_FixedTmp:
4929 case kIemNativeWhat_pVCpuFixed:
4930 case kIemNativeWhat_pCtxFixed:
4931 case kIemNativeWhat_PcShadow:
4932 case kIemNativeWhat_FixedReserved:
4933 case kIemNativeWhat_Invalid:
4934 case kIemNativeWhat_End:
4935 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4936 }
4937 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4938 }
4939 }
4940
4941 /*
4942 * Do the actual freeing.
4943 */
4944 if (pReNative->Core.bmHstRegs & fRegsToFree)
4945 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4946 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4947 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4948
4949 /* If there are guest register shadows in any call-volatile register, we
4950 have to clear the corrsponding guest register masks for each register. */
4951 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4952 if (fHstRegsWithGstShadow)
4953 {
4954 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4955 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4956 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4957 do
4958 {
4959 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4960 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4961
4962 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4963 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4964 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4965 } while (fHstRegsWithGstShadow != 0);
4966 }
4967
4968 return off;
4969}
4970
4971
4972/**
4973 * Flushes a set of guest register shadow copies.
4974 *
4975 * This is usually done after calling a threaded function or a C-implementation
4976 * of an instruction.
4977 *
4978 * @param pReNative The native recompile state.
4979 * @param fGstRegs Set of guest registers to flush.
4980 */
4981DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4982{
4983 /*
4984 * Reduce the mask by what's currently shadowed
4985 */
4986 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4987 fGstRegs &= bmGstRegShadowsOld;
4988 if (fGstRegs)
4989 {
4990 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4991 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4992 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4993 if (bmGstRegShadowsNew)
4994 {
4995 /*
4996 * Partial.
4997 */
4998 do
4999 {
5000 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5001 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5002 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5003 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5004 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5005
5006 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5007 fGstRegs &= ~fInThisHstReg;
5008 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5009 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5010 if (!fGstRegShadowsNew)
5011 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5012 } while (fGstRegs != 0);
5013 }
5014 else
5015 {
5016 /*
5017 * Clear all.
5018 */
5019 do
5020 {
5021 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5022 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5023 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5024 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5025 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5026
5027 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5028 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5029 } while (fGstRegs != 0);
5030 pReNative->Core.bmHstRegsWithGstShadow = 0;
5031 }
5032 }
5033}
5034
5035
5036/**
5037 * Flushes guest register shadow copies held by a set of host registers.
5038 *
5039 * This is used with the TLB lookup code for ensuring that we don't carry on
5040 * with any guest shadows in volatile registers, as these will get corrupted by
5041 * a TLB miss.
5042 *
5043 * @param pReNative The native recompile state.
5044 * @param fHstRegs Set of host registers to flush guest shadows for.
5045 */
5046DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5047{
5048 /*
5049 * Reduce the mask by what's currently shadowed.
5050 */
5051 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5052 fHstRegs &= bmHstRegsWithGstShadowOld;
5053 if (fHstRegs)
5054 {
5055 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5056 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5057 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5058 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5059 if (bmHstRegsWithGstShadowNew)
5060 {
5061 /*
5062 * Partial (likely).
5063 */
5064 uint64_t fGstShadows = 0;
5065 do
5066 {
5067 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5068 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5069 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5070 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5071
5072 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5073 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5074 fHstRegs &= ~RT_BIT_32(idxHstReg);
5075 } while (fHstRegs != 0);
5076 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5077 }
5078 else
5079 {
5080 /*
5081 * Clear all.
5082 */
5083 do
5084 {
5085 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5086 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5087 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5088 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5089
5090 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5091 fHstRegs &= ~RT_BIT_32(idxHstReg);
5092 } while (fHstRegs != 0);
5093 pReNative->Core.bmGstRegShadows = 0;
5094 }
5095 }
5096}
5097
5098
5099/**
5100 * Restores guest shadow copies in volatile registers.
5101 *
5102 * This is used after calling a helper function (think TLB miss) to restore the
5103 * register state of volatile registers.
5104 *
5105 * @param pReNative The native recompile state.
5106 * @param off The code buffer offset.
5107 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5108 * be active (allocated) w/o asserting. Hack.
5109 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5110 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5111 */
5112DECL_HIDDEN_THROW(uint32_t)
5113iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5114{
5115 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5116 if (fHstRegs)
5117 {
5118 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5119 do
5120 {
5121 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5122
5123 /* It's not fatal if a register is active holding a variable that
5124 shadowing a guest register, ASSUMING all pending guest register
5125 writes were flushed prior to the helper call. However, we'll be
5126 emitting duplicate restores, so it wasts code space. */
5127 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5128 RT_NOREF(fHstRegsActiveShadows);
5129
5130 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5131 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5132 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5133 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5134
5135 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5136 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5137
5138 fHstRegs &= ~RT_BIT_32(idxHstReg);
5139 } while (fHstRegs != 0);
5140 }
5141 return off;
5142}
5143
5144
5145#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5146# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
5147static uint32_t iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5148{
5149 /* Compare the shadow with the context value, they should match. */
5150 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
5151 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
5152 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
5153 return off;
5154}
5155# endif
5156
5157/**
5158 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5159 */
5160static uint32_t
5161iemNativeEmitPcWriteback(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5162{
5163 if (pReNative->Core.offPc)
5164 {
5165# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5166 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5167 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5168# endif
5169
5170# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5171 /* Allocate a temporary PC register. */
5172 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5173
5174 /* Perform the addition and store the result. */
5175 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5176 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5177
5178 /* Free but don't flush the PC register. */
5179 iemNativeRegFreeTmp(pReNative, idxPcReg);
5180# else
5181 /* Compare the shadow with the context value, they should match. */
5182 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5183 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5184# endif
5185
5186 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5187 pReNative->Core.offPc = 0;
5188 pReNative->Core.cInstrPcUpdateSkipped = 0;
5189 }
5190# if 0 /*def IEMNATIVE_WITH_TB_DEBUG_INFO*/
5191 else
5192 {
5193 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5194 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc);
5195 }
5196# endif
5197
5198 return off;
5199}
5200#endif
5201
5202
5203#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5204
5205
5206/*********************************************************************************************************************************
5207* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5208*********************************************************************************************************************************/
5209
5210/**
5211 * Info about shadowed guest SIMD register values.
5212 * @see IEMNATIVEGSTSIMDREG
5213 */
5214static struct
5215{
5216 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5217 uint32_t offXmm;
5218 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5219 uint32_t offYmm;
5220 /** Name (for logging). */
5221 const char *pszName;
5222} const g_aGstSimdShadowInfo[] =
5223{
5224#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5225 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5226 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5227 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5228 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5229 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5230 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5231 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5232 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5233 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5234 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5235 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5236 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5237 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5238 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5239 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5240 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5241 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5242#undef CPUMCTX_OFF_AND_SIZE
5243};
5244AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5245
5246
5247#ifdef LOG_ENABLED
5248/** Host CPU SIMD register names. */
5249DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
5250{
5251#ifdef RT_ARCH_AMD64
5252 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
5253#elif RT_ARCH_ARM64
5254 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
5255 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
5256#else
5257# error "port me"
5258#endif
5259};
5260#endif
5261
5262
5263DECL_FORCE_INLINE(uint8_t) iemNativeSimdRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdReg,
5264 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
5265{
5266 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5267
5268 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = enmWhat;
5269 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
5270 RT_NOREF(idxVar);
5271 return idxSimdReg;
5272}
5273
5274
5275/**
5276 * Frees a temporary SIMD register.
5277 *
5278 * Any shadow copies of guest registers assigned to the host register will not
5279 * be flushed by this operation.
5280 */
5281DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5282{
5283 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5284 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5285 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5286 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5287 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5288}
5289
5290
5291/**
5292 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5293 *
5294 * @returns New code bufferoffset.
5295 * @param pReNative The native recompile state.
5296 * @param off Current code buffer position.
5297 * @param enmGstSimdReg The guest SIMD register to flush.
5298 */
5299static uint32_t iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5300{
5301 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5302
5303 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5304 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5305 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5306 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5307
5308 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5309 {
5310 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5311 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5312 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5313 }
5314
5315 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5316 {
5317 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5318 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5319 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5320 }
5321
5322 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5323 return off;
5324}
5325
5326
5327/**
5328 * Locate a register, possibly freeing one up.
5329 *
5330 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5331 * failed.
5332 *
5333 * @returns Host register number on success. Returns UINT8_MAX if no registers
5334 * found, the caller is supposed to deal with this and raise a
5335 * allocation type specific status code (if desired).
5336 *
5337 * @throws VBox status code if we're run into trouble spilling a variable of
5338 * recording debug info. Does NOT throw anything if we're out of
5339 * registers, though.
5340 */
5341static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5342 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5343{
5344 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
5345 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5346 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5347
5348 /*
5349 * Try a freed register that's shadowing a guest register.
5350 */
5351 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5352 if (fRegs)
5353 {
5354 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
5355
5356#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5357 /*
5358 * When we have livness information, we use it to kick out all shadowed
5359 * guest register that will not be needed any more in this TB. If we're
5360 * lucky, this may prevent us from ending up here again.
5361 *
5362 * Note! We must consider the previous entry here so we don't free
5363 * anything that the current threaded function requires (current
5364 * entry is produced by the next threaded function).
5365 */
5366 uint32_t const idxCurCall = pReNative->idxCurCall;
5367 if (idxCurCall > 0)
5368 {
5369 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5370
5371# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5372 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5373 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5374 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5375#else
5376 /* Construct a mask of the registers not in the read or write state.
5377 Note! We could skips writes, if they aren't from us, as this is just
5378 a hack to prevent trashing registers that have just been written
5379 or will be written when we retire the current instruction. */
5380 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5381 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5382 & IEMLIVENESSBIT_MASK;
5383#endif
5384 /* If it matches any shadowed registers. */
5385 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5386 {
5387 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5388 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5389 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5390
5391 /* See if we've got any unshadowed registers we can return now. */
5392 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5393 if (fUnshadowedRegs)
5394 {
5395 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5396 return (fPreferVolatile
5397 ? ASMBitFirstSetU32(fUnshadowedRegs)
5398 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5399 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5400 - 1;
5401 }
5402 }
5403 }
5404#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5405
5406 unsigned const idxReg = (fPreferVolatile
5407 ? ASMBitFirstSetU32(fRegs)
5408 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5409 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5410 - 1;
5411
5412 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5413 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5414 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5415 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5416 Assert(pReNative->Core.aHstSimdRegs[idxReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5417
5418 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5419 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5420 uint32_t idxGstSimdReg = 0;
5421 do
5422 {
5423 if (fGstRegShadows & 0x1)
5424 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5425 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5426 idxGstSimdReg++;
5427 fGstRegShadows >>= 1;
5428 } while (fGstRegShadows);
5429
5430 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5431 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5432 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5433 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5434 return idxReg;
5435 }
5436
5437 /*
5438 * Try free up a variable that's in a register.
5439 *
5440 * We do two rounds here, first evacuating variables we don't need to be
5441 * saved on the stack, then in the second round move things to the stack.
5442 */
5443 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5444 AssertReleaseFailed(); /** @todo No variable support right now. */
5445#if 0
5446 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5447 {
5448 uint32_t fVars = pReNative->Core.bmSimdVars;
5449 while (fVars)
5450 {
5451 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5452 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5453 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5454 && (RT_BIT_32(idxReg) & fRegMask)
5455 && ( iLoop == 0
5456 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5457 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5458 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5459 {
5460 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5461 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5462 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5463 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5464 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5465 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5466
5467 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5468 {
5469 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5470 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5471 }
5472
5473 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5474 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5475
5476 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5477 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5478 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5479 return idxReg;
5480 }
5481 fVars &= ~RT_BIT_32(idxVar);
5482 }
5483 }
5484#endif
5485
5486 AssertFailed();
5487 return UINT8_MAX;
5488}
5489
5490
5491/**
5492 * Marks host SIMD register @a idxHstSimdReg as containing a shadow copy of guest
5493 * SIMD register @a enmGstSimdReg.
5494 *
5495 * ASSUMES that caller has made sure @a enmGstSimdReg is not associated with any
5496 * host register before calling.
5497 */
5498DECL_FORCE_INLINE(void)
5499iemNativeSimdRegMarkAsGstSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
5500{
5501 Assert(!(pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg)));
5502 Assert(!pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5503 Assert((unsigned)enmGstSimdReg < (unsigned)kIemNativeGstSimdReg_End);
5504
5505 pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] = idxHstSimdReg;
5506 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
5507 pReNative->Core.bmGstSimdRegShadows |= RT_BIT_64(enmGstSimdReg);
5508 pReNative->Core.bmHstSimdRegsWithGstShadow |= RT_BIT_32(idxHstSimdReg);
5509#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5510 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5511 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxHstSimdReg);
5512#else
5513 RT_NOREF(off);
5514#endif
5515}
5516
5517
5518/**
5519 * Transfers the guest SIMD register shadow claims of @a enmGstSimdReg from @a idxSimdRegFrom
5520 * to @a idxSimdRegTo.
5521 */
5522DECL_FORCE_INLINE(void)
5523iemNativeSimdRegTransferGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdRegFrom, uint8_t idxSimdRegTo,
5524 IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
5525{
5526 Assert(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5527 Assert(pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] == idxSimdRegFrom);
5528 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows)
5529 == pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows
5530 && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5531 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows)
5532 == pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows);
5533 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdRegFrom))
5534 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows));
5535 Assert( pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded
5536 == pReNative->Core.aHstSimdRegs[idxSimdRegTo].enmLoaded);
5537
5538
5539 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstSimdReg);
5540 pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows = fGstRegShadowsFrom;
5541 if (!fGstRegShadowsFrom)
5542 {
5543 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdRegFrom);
5544 pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5545 }
5546 pReNative->Core.bmHstSimdRegsWithGstShadow |= RT_BIT_32(idxSimdRegTo);
5547 pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
5548 pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] = idxSimdRegTo;
5549#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5550 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5551 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxSimdRegTo, idxSimdRegFrom);
5552#else
5553 RT_NOREF(off);
5554#endif
5555}
5556
5557
5558/**
5559 * Clear any guest register shadow claims from @a idxHstSimdReg.
5560 *
5561 * The register does not need to be shadowing any guest registers.
5562 */
5563DECL_FORCE_INLINE(void)
5564iemNativeSimdRegClearGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, uint32_t off)
5565{
5566 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5567 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows
5568 && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5569 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstSimdReg))
5570 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5571 Assert( !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyLo128)
5572 && !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyHi128));
5573
5574#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5575 uint64_t fGstRegs = pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5576 if (fGstRegs)
5577 {
5578 Assert(fGstRegs < RT_BIT_64(kIemNativeGstSimdReg_End));
5579 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5580 while (fGstRegs)
5581 {
5582 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5583 fGstRegs &= ~RT_BIT_64(iGstReg);
5584 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, (IEMNATIVEGSTSIMDREG)iGstReg, UINT8_MAX, idxHstSimdReg);
5585 }
5586 }
5587#else
5588 RT_NOREF(off);
5589#endif
5590
5591 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstSimdReg);
5592 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5593 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5594 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5595}
5596
5597
5598/**
5599 * Flushes a set of guest register shadow copies.
5600 *
5601 * This is usually done after calling a threaded function or a C-implementation
5602 * of an instruction.
5603 *
5604 * @param pReNative The native recompile state.
5605 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5606 */
5607DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5608{
5609 /*
5610 * Reduce the mask by what's currently shadowed
5611 */
5612 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5613 fGstSimdRegs &= bmGstSimdRegShadows;
5614 if (fGstSimdRegs)
5615 {
5616 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5617 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5618 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5619 if (bmGstSimdRegShadowsNew)
5620 {
5621 /*
5622 * Partial.
5623 */
5624 do
5625 {
5626 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5627 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5628 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5629 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5630 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5631 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5632
5633 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5634 fGstSimdRegs &= ~fInThisHstReg;
5635 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5636 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5637 if (!fGstRegShadowsNew)
5638 {
5639 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5640 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5641 }
5642 } while (fGstSimdRegs != 0);
5643 }
5644 else
5645 {
5646 /*
5647 * Clear all.
5648 */
5649 do
5650 {
5651 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5652 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5653 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5654 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5655 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5656 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5657
5658 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5659 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5660 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5661 } while (fGstSimdRegs != 0);
5662 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5663 }
5664 }
5665}
5666
5667
5668/**
5669 * Allocates a temporary host SIMD register.
5670 *
5671 * This may emit code to save register content onto the stack in order to free
5672 * up a register.
5673 *
5674 * @returns The host register number; throws VBox status code on failure,
5675 * so no need to check the return value.
5676 * @param pReNative The native recompile state.
5677 * @param poff Pointer to the variable with the code buffer position.
5678 * This will be update if we need to move a variable from
5679 * register to stack in order to satisfy the request.
5680 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5681 * registers (@c true, default) or the other way around
5682 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5683 */
5684DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5685{
5686 /*
5687 * Try find a completely unused register, preferably a call-volatile one.
5688 */
5689 uint8_t idxSimdReg;
5690 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5691 & ~pReNative->Core.bmHstRegsWithGstShadow
5692 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5693 if (fRegs)
5694 {
5695 if (fPreferVolatile)
5696 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5697 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5698 else
5699 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5700 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5701 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5702 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5703 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5704 }
5705 else
5706 {
5707 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5708 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5709 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5710 }
5711
5712 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5713 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5714}
5715
5716
5717/**
5718 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5719 * registers.
5720 *
5721 * @returns The host register number; throws VBox status code on failure,
5722 * so no need to check the return value.
5723 * @param pReNative The native recompile state.
5724 * @param poff Pointer to the variable with the code buffer position.
5725 * This will be update if we need to move a variable from
5726 * register to stack in order to satisfy the request.
5727 * @param fRegMask Mask of acceptable registers.
5728 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5729 * registers (@c true, default) or the other way around
5730 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5731 */
5732DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5733 bool fPreferVolatile /*= true*/)
5734{
5735 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5736 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5737
5738 /*
5739 * Try find a completely unused register, preferably a call-volatile one.
5740 */
5741 uint8_t idxSimdReg;
5742 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5743 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5744 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5745 & fRegMask;
5746 if (fRegs)
5747 {
5748 if (fPreferVolatile)
5749 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5750 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5751 else
5752 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5753 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5754 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5755 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5756 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5757 }
5758 else
5759 {
5760 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5761 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5762 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5763 }
5764
5765 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5766 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5767}
5768
5769
5770/**
5771 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5772 *
5773 * @param pReNative The native recompile state.
5774 * @param idxHstSimdReg The host SIMD register to update the state for.
5775 * @param enmLoadSz The load size to set.
5776 */
5777DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5778{
5779 /* Everything valid already? -> nothing to do. */
5780 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5781 return;
5782
5783 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5784 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5785 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5786 {
5787 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5788 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5789 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5790 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5791 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5792 }
5793}
5794
5795
5796static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5797 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5798{
5799 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5800 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5801 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5802 {
5803# ifdef RT_ARCH_ARM64
5804 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5805 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5806# endif
5807
5808 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5809 {
5810 switch (enmLoadSzDst)
5811 {
5812 case kIemNativeGstSimdRegLdStSz_256:
5813 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5814 break;
5815 case kIemNativeGstSimdRegLdStSz_Low128:
5816 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5817 break;
5818 case kIemNativeGstSimdRegLdStSz_High128:
5819 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5820 break;
5821 default:
5822 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5823 }
5824
5825 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5826 }
5827 }
5828 else
5829 {
5830 /* Complicated stuff where the source is currently missing something, later. */
5831 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5832 }
5833
5834 return off;
5835}
5836
5837
5838/**
5839 * Allocates a temporary host SIMD register for keeping a guest
5840 * SIMD register value.
5841 *
5842 * Since we may already have a register holding the guest register value,
5843 * code will be emitted to do the loading if that's not the case. Code may also
5844 * be emitted if we have to free up a register to satify the request.
5845 *
5846 * @returns The host register number; throws VBox status code on failure, so no
5847 * need to check the return value.
5848 * @param pReNative The native recompile state.
5849 * @param poff Pointer to the variable with the code buffer
5850 * position. This will be update if we need to move a
5851 * variable from register to stack in order to satisfy
5852 * the request.
5853 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5854 * @param enmIntendedUse How the caller will be using the host register.
5855 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5856 * register is okay (default). The ASSUMPTION here is
5857 * that the caller has already flushed all volatile
5858 * registers, so this is only applied if we allocate a
5859 * new register.
5860 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5861 */
5862DECL_HIDDEN_THROW(uint8_t)
5863iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5864 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5865 bool fNoVolatileRegs /*= false*/)
5866{
5867 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5868#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5869 AssertMsg( pReNative->idxCurCall == 0
5870 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5871 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5872 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5873 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5874 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5875 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5876#endif
5877#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5878 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5879#endif
5880 uint32_t const fRegMask = !fNoVolatileRegs
5881 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5882 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5883
5884 /*
5885 * First check if the guest register value is already in a host register.
5886 */
5887 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5888 {
5889 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5890 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5891 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5892 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5893
5894 /* It's not supposed to be allocated... */
5895 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5896 {
5897 /*
5898 * If the register will trash the guest shadow copy, try find a
5899 * completely unused register we can use instead. If that fails,
5900 * we need to disassociate the host reg from the guest reg.
5901 */
5902 /** @todo would be nice to know if preserving the register is in any way helpful. */
5903 /* If the purpose is calculations, try duplicate the register value as
5904 we'll be clobbering the shadow. */
5905 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5906 && ( ~pReNative->Core.bmHstSimdRegs
5907 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5908 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5909 {
5910 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5911
5912 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5913
5914 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5915 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5916 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5917 idxSimdReg = idxRegNew;
5918 }
5919 /* If the current register matches the restrictions, go ahead and allocate
5920 it for the caller. */
5921 else if (fRegMask & RT_BIT_32(idxSimdReg))
5922 {
5923 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5924 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5925 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5926 {
5927 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5928 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5929 else
5930 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5931 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5932 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5933 }
5934 else
5935 {
5936 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5937 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5938 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5939 }
5940 }
5941 /* Otherwise, allocate a register that satisfies the caller and transfer
5942 the shadowing if compatible with the intended use. (This basically
5943 means the call wants a non-volatile register (RSP push/pop scenario).) */
5944 else
5945 {
5946 Assert(fNoVolatileRegs);
5947 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5948 !fNoVolatileRegs
5949 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5950 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5951 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5952 {
5953 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5954 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5955 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5956 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5957 }
5958 else
5959 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5960 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5961 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5962 idxSimdReg = idxRegNew;
5963 }
5964 }
5965 else
5966 {
5967 /*
5968 * Oops. Shadowed guest register already allocated!
5969 *
5970 * Allocate a new register, copy the value and, if updating, the
5971 * guest shadow copy assignment to the new register.
5972 */
5973 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5974 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5975 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5976 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5977
5978 /** @todo share register for readonly access. */
5979 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5980 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5981
5982 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5983 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5984 else
5985 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5986
5987 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5988 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5989 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5990 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5991 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5992 else
5993 {
5994 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5995 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5996 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5997 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5998 }
5999 idxSimdReg = idxRegNew;
6000 }
6001 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
6002
6003#ifdef VBOX_STRICT
6004 /* Strict builds: Check that the value is correct. */
6005 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6006 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
6007#endif
6008
6009 return idxSimdReg;
6010 }
6011
6012 /*
6013 * Allocate a new register, load it with the guest value and designate it as a copy of the
6014 */
6015 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
6016
6017 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6018 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
6019 else
6020 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
6021
6022 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
6023 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
6024
6025 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
6026 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
6027
6028 return idxRegNew;
6029}
6030
6031#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6032
6033
6034
6035/*********************************************************************************************************************************
6036* Code emitters for flushing pending guest register writes and sanity checks *
6037*********************************************************************************************************************************/
6038
6039/**
6040 * Flushes delayed write of a specific guest register.
6041 *
6042 * This must be called prior to calling CImpl functions and any helpers that use
6043 * the guest state (like raising exceptions) and such.
6044 *
6045 * This optimization has not yet been implemented. The first target would be
6046 * RIP updates, since these are the most common ones.
6047 */
6048DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6049 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
6050{
6051#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6052 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
6053#endif
6054
6055#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6056 if ( enmClass == kIemNativeGstRegRef_XReg
6057 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
6058 {
6059 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
6060 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
6061 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
6062
6063 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6064 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
6065 }
6066#endif
6067 RT_NOREF(pReNative, enmClass, idxReg);
6068 return off;
6069}
6070
6071
6072/**
6073 * Flushes any delayed guest register writes.
6074 *
6075 * This must be called prior to calling CImpl functions and any helpers that use
6076 * the guest state (like raising exceptions) and such.
6077 *
6078 * This optimization has not yet been implemented. The first target would be
6079 * RIP updates, since these are the most common ones.
6080 */
6081DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/,
6082 bool fFlushShadows /*= true*/)
6083{
6084#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6085 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
6086 off = iemNativeEmitPcWriteback(pReNative, off);
6087#else
6088 RT_NOREF(pReNative, fGstShwExcept);
6089#endif
6090
6091#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6092 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
6093 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
6094 {
6095 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
6096 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
6097
6098 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
6099 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
6100
6101 if ( fFlushShadows
6102 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
6103 {
6104 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
6105
6106 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6107 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
6108 }
6109 }
6110#else
6111 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
6112#endif
6113
6114 return off;
6115}
6116
6117
6118#ifdef VBOX_STRICT
6119/**
6120 * Does internal register allocator sanity checks.
6121 */
6122static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
6123{
6124 /*
6125 * Iterate host registers building a guest shadowing set.
6126 */
6127 uint64_t bmGstRegShadows = 0;
6128 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
6129 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
6130 while (bmHstRegsWithGstShadow)
6131 {
6132 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
6133 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6134 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
6135
6136 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
6137 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
6138 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
6139 bmGstRegShadows |= fThisGstRegShadows;
6140 while (fThisGstRegShadows)
6141 {
6142 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
6143 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
6144 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
6145 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
6146 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
6147 }
6148 }
6149 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
6150 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
6151 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
6152
6153 /*
6154 * Now the other way around, checking the guest to host index array.
6155 */
6156 bmHstRegsWithGstShadow = 0;
6157 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
6158 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
6159 while (bmGstRegShadows)
6160 {
6161 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
6162 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
6163 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
6164
6165 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6166 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
6167 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
6168 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
6169 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
6170 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
6171 }
6172 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
6173 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
6174 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
6175}
6176#endif
6177
6178
6179/*********************************************************************************************************************************
6180* Code Emitters (larger snippets) *
6181*********************************************************************************************************************************/
6182
6183/**
6184 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6185 * extending to 64-bit width.
6186 *
6187 * @returns New code buffer offset on success, UINT32_MAX on failure.
6188 * @param pReNative .
6189 * @param off The current code buffer position.
6190 * @param idxHstReg The host register to load the guest register value into.
6191 * @param enmGstReg The guest register to load.
6192 *
6193 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6194 * that is something the caller needs to do if applicable.
6195 */
6196DECL_HIDDEN_THROW(uint32_t)
6197iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6198{
6199 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
6200 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6201
6202 switch (g_aGstShadowInfo[enmGstReg].cb)
6203 {
6204 case sizeof(uint64_t):
6205 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6206 case sizeof(uint32_t):
6207 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6208 case sizeof(uint16_t):
6209 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6210#if 0 /* not present in the table. */
6211 case sizeof(uint8_t):
6212 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6213#endif
6214 default:
6215 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6216 }
6217}
6218
6219
6220#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6221/**
6222 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6223 *
6224 * @returns New code buffer offset on success, UINT32_MAX on failure.
6225 * @param pReNative The recompiler state.
6226 * @param off The current code buffer position.
6227 * @param idxHstSimdReg The host register to load the guest register value into.
6228 * @param enmGstSimdReg The guest register to load.
6229 * @param enmLoadSz The load size of the register.
6230 *
6231 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6232 * that is something the caller needs to do if applicable.
6233 */
6234DECL_HIDDEN_THROW(uint32_t)
6235iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6236 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6237{
6238 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6239
6240 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6241 switch (enmLoadSz)
6242 {
6243 case kIemNativeGstSimdRegLdStSz_256:
6244 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6245 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6246 case kIemNativeGstSimdRegLdStSz_Low128:
6247 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6248 case kIemNativeGstSimdRegLdStSz_High128:
6249 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6250 default:
6251 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6252 }
6253}
6254#endif
6255
6256#ifdef VBOX_STRICT
6257/**
6258 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6259 *
6260 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6261 * Trashes EFLAGS on AMD64.
6262 */
6263static uint32_t
6264iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6265{
6266# ifdef RT_ARCH_AMD64
6267 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6268
6269 /* rol reg64, 32 */
6270 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6271 pbCodeBuf[off++] = 0xc1;
6272 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6273 pbCodeBuf[off++] = 32;
6274
6275 /* test reg32, ffffffffh */
6276 if (idxReg >= 8)
6277 pbCodeBuf[off++] = X86_OP_REX_B;
6278 pbCodeBuf[off++] = 0xf7;
6279 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6280 pbCodeBuf[off++] = 0xff;
6281 pbCodeBuf[off++] = 0xff;
6282 pbCodeBuf[off++] = 0xff;
6283 pbCodeBuf[off++] = 0xff;
6284
6285 /* je/jz +1 */
6286 pbCodeBuf[off++] = 0x74;
6287 pbCodeBuf[off++] = 0x01;
6288
6289 /* int3 */
6290 pbCodeBuf[off++] = 0xcc;
6291
6292 /* rol reg64, 32 */
6293 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6294 pbCodeBuf[off++] = 0xc1;
6295 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6296 pbCodeBuf[off++] = 32;
6297
6298# elif defined(RT_ARCH_ARM64)
6299 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6300 /* lsr tmp0, reg64, #32 */
6301 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6302 /* cbz tmp0, +1 */
6303 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6304 /* brk #0x1100 */
6305 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6306
6307# else
6308# error "Port me!"
6309# endif
6310 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6311 return off;
6312}
6313#endif /* VBOX_STRICT */
6314
6315
6316#ifdef VBOX_STRICT
6317/**
6318 * Emitting code that checks that the content of register @a idxReg is the same
6319 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6320 * instruction if that's not the case.
6321 *
6322 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6323 * Trashes EFLAGS on AMD64.
6324 */
6325static uint32_t
6326iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6327{
6328# ifdef RT_ARCH_AMD64
6329 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6330
6331 /* cmp reg, [mem] */
6332 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6333 {
6334 if (idxReg >= 8)
6335 pbCodeBuf[off++] = X86_OP_REX_R;
6336 pbCodeBuf[off++] = 0x38;
6337 }
6338 else
6339 {
6340 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6341 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6342 else
6343 {
6344 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6345 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6346 else
6347 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6348 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6349 if (idxReg >= 8)
6350 pbCodeBuf[off++] = X86_OP_REX_R;
6351 }
6352 pbCodeBuf[off++] = 0x39;
6353 }
6354 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6355
6356 /* je/jz +1 */
6357 pbCodeBuf[off++] = 0x74;
6358 pbCodeBuf[off++] = 0x01;
6359
6360 /* int3 */
6361 pbCodeBuf[off++] = 0xcc;
6362
6363 /* For values smaller than the register size, we must check that the rest
6364 of the register is all zeros. */
6365 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6366 {
6367 /* test reg64, imm32 */
6368 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6369 pbCodeBuf[off++] = 0xf7;
6370 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6371 pbCodeBuf[off++] = 0;
6372 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6373 pbCodeBuf[off++] = 0xff;
6374 pbCodeBuf[off++] = 0xff;
6375
6376 /* je/jz +1 */
6377 pbCodeBuf[off++] = 0x74;
6378 pbCodeBuf[off++] = 0x01;
6379
6380 /* int3 */
6381 pbCodeBuf[off++] = 0xcc;
6382 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6383 }
6384 else
6385 {
6386 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6387 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6388 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6389 }
6390
6391# elif defined(RT_ARCH_ARM64)
6392 /* mov TMP0, [gstreg] */
6393 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6394
6395 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6396 /* sub tmp0, tmp0, idxReg */
6397 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6398 /* cbz tmp0, +1 */
6399 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6400 /* brk #0x1000+enmGstReg */
6401 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6403
6404# else
6405# error "Port me!"
6406# endif
6407 return off;
6408}
6409
6410
6411# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6412/**
6413 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6414 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6415 * instruction if that's not the case.
6416 *
6417 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6418 * Trashes EFLAGS on AMD64.
6419 */
6420static uint32_t
6421iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg,
6422 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6423{
6424 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6425 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6426 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6427 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6428 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6429 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6430 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6431 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6432 return off;
6433
6434# ifdef RT_ARCH_AMD64
6435 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */
6436
6437 /* movdqa vectmp0, idxSimdReg */
6438 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6439
6440 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6441
6442 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6443 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6444 if (idxSimdReg >= 8)
6445 pbCodeBuf[off++] = X86_OP_REX_R;
6446 pbCodeBuf[off++] = 0x0f;
6447 pbCodeBuf[off++] = 0x38;
6448 pbCodeBuf[off++] = 0x29;
6449 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6450
6451 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6452 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6453 pbCodeBuf[off++] = X86_OP_REX_W
6454 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6455 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6456 pbCodeBuf[off++] = 0x0f;
6457 pbCodeBuf[off++] = 0x3a;
6458 pbCodeBuf[off++] = 0x16;
6459 pbCodeBuf[off++] = 0xeb;
6460 pbCodeBuf[off++] = 0x00;
6461
6462 /* test tmp0, 0xffffffff. */
6463 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6464 pbCodeBuf[off++] = 0xf7;
6465 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7);
6466 pbCodeBuf[off++] = 0xff;
6467 pbCodeBuf[off++] = 0xff;
6468 pbCodeBuf[off++] = 0xff;
6469 pbCodeBuf[off++] = 0xff;
6470
6471 /* je/jz +1 */
6472 pbCodeBuf[off++] = 0x74;
6473 pbCodeBuf[off++] = 0x01;
6474
6475 /* int3 */
6476 pbCodeBuf[off++] = 0xcc;
6477
6478 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6479 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6480 pbCodeBuf[off++] = X86_OP_REX_W
6481 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6482 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6483 pbCodeBuf[off++] = 0x0f;
6484 pbCodeBuf[off++] = 0x3a;
6485 pbCodeBuf[off++] = 0x16;
6486 pbCodeBuf[off++] = 0xeb;
6487 pbCodeBuf[off++] = 0x01;
6488
6489 /* test tmp0, 0xffffffff. */
6490 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6491 pbCodeBuf[off++] = 0xf7;
6492 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7);
6493 pbCodeBuf[off++] = 0xff;
6494 pbCodeBuf[off++] = 0xff;
6495 pbCodeBuf[off++] = 0xff;
6496 pbCodeBuf[off++] = 0xff;
6497
6498 /* je/jz +1 */
6499 pbCodeBuf[off++] = 0x74;
6500 pbCodeBuf[off++] = 0x01;
6501
6502 /* int3 */
6503 pbCodeBuf[off++] = 0xcc;
6504
6505# elif defined(RT_ARCH_ARM64)
6506 /* mov vectmp0, [gstreg] */
6507 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6508
6509 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6510 {
6511 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6512 /* eor vectmp0, vectmp0, idxSimdReg */
6513 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6514 /* cnt vectmp0, vectmp0, #0*/
6515 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6516 /* umov tmp0, vectmp0.D[0] */
6517 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6518 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6519 /* cbz tmp0, +1 */
6520 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6521 /* brk #0x1000+enmGstReg */
6522 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6523 }
6524
6525 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6526 {
6527 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6528 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6529 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
6530 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6531 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6532 /* umov tmp0, (vectmp0 + 1).D[0] */
6533 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6534 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6535 /* cbz tmp0, +1 */
6536 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6537 /* brk #0x1000+enmGstReg */
6538 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6539 }
6540
6541# else
6542# error "Port me!"
6543# endif
6544
6545 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6546 return off;
6547}
6548# endif
6549#endif /* VBOX_STRICT */
6550
6551
6552#ifdef VBOX_STRICT
6553/**
6554 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6555 * important bits.
6556 *
6557 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6558 * Trashes EFLAGS on AMD64.
6559 */
6560static uint32_t
6561iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6562{
6563 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6564 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6565 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6566 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6567
6568#ifdef RT_ARCH_AMD64
6569 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6570
6571 /* je/jz +1 */
6572 pbCodeBuf[off++] = 0x74;
6573 pbCodeBuf[off++] = 0x01;
6574
6575 /* int3 */
6576 pbCodeBuf[off++] = 0xcc;
6577
6578# elif defined(RT_ARCH_ARM64)
6579 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6580
6581 /* b.eq +1 */
6582 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6583 /* brk #0x2000 */
6584 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6585
6586# else
6587# error "Port me!"
6588# endif
6589 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6590
6591 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6592 return off;
6593}
6594#endif /* VBOX_STRICT */
6595
6596
6597/**
6598 * Emits a code for checking the return code of a call and rcPassUp, returning
6599 * from the code if either are non-zero.
6600 */
6601DECL_HIDDEN_THROW(uint32_t)
6602iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6603{
6604#ifdef RT_ARCH_AMD64
6605 /*
6606 * AMD64: eax = call status code.
6607 */
6608
6609 /* edx = rcPassUp */
6610 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6611# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6612 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6613# endif
6614
6615 /* edx = eax | rcPassUp */
6616 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6617 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6618 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6619 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6620
6621 /* Jump to non-zero status return path. */
6622 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6623
6624 /* done. */
6625
6626#elif RT_ARCH_ARM64
6627 /*
6628 * ARM64: w0 = call status code.
6629 */
6630# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6631 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6632# endif
6633 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6634
6635 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6636
6637 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6638
6639 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6640 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6641 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6642
6643#else
6644# error "port me"
6645#endif
6646 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6647 RT_NOREF_PV(idxInstr);
6648 return off;
6649}
6650
6651
6652/**
6653 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6654 * raising a \#GP(0) if it isn't.
6655 *
6656 * @returns New code buffer offset, UINT32_MAX on failure.
6657 * @param pReNative The native recompile state.
6658 * @param off The code buffer offset.
6659 * @param idxAddrReg The host register with the address to check.
6660 * @param idxInstr The current instruction.
6661 */
6662DECL_HIDDEN_THROW(uint32_t)
6663iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6664{
6665 /*
6666 * Make sure we don't have any outstanding guest register writes as we may
6667 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6668 */
6669 off = iemNativeRegFlushPendingWrites(pReNative, off);
6670
6671#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6672 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6673#else
6674 RT_NOREF(idxInstr);
6675#endif
6676
6677#ifdef RT_ARCH_AMD64
6678 /*
6679 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6680 * return raisexcpt();
6681 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6682 */
6683 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6684
6685 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6686 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6687 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6688 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6689 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6690
6691 iemNativeRegFreeTmp(pReNative, iTmpReg);
6692
6693#elif defined(RT_ARCH_ARM64)
6694 /*
6695 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6696 * return raisexcpt();
6697 * ----
6698 * mov x1, 0x800000000000
6699 * add x1, x0, x1
6700 * cmp xzr, x1, lsr 48
6701 * b.ne .Lraisexcpt
6702 */
6703 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6704
6705 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6706 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6707 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6708 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6709
6710 iemNativeRegFreeTmp(pReNative, iTmpReg);
6711
6712#else
6713# error "Port me"
6714#endif
6715 return off;
6716}
6717
6718
6719/**
6720 * Emits code to check if that the content of @a idxAddrReg is within the limit
6721 * of CS, raising a \#GP(0) if it isn't.
6722 *
6723 * @returns New code buffer offset; throws VBox status code on error.
6724 * @param pReNative The native recompile state.
6725 * @param off The code buffer offset.
6726 * @param idxAddrReg The host register (32-bit) with the address to
6727 * check.
6728 * @param idxInstr The current instruction.
6729 */
6730DECL_HIDDEN_THROW(uint32_t)
6731iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6732 uint8_t idxAddrReg, uint8_t idxInstr)
6733{
6734 /*
6735 * Make sure we don't have any outstanding guest register writes as we may
6736 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6737 */
6738 off = iemNativeRegFlushPendingWrites(pReNative, off);
6739
6740#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6741 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6742#else
6743 RT_NOREF(idxInstr);
6744#endif
6745
6746 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6747 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6748 kIemNativeGstRegUse_ReadOnly);
6749
6750 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6751 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6752
6753 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6754 return off;
6755}
6756
6757
6758/**
6759 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
6760 *
6761 * @returns The flush mask.
6762 * @param fCImpl The IEM_CIMPL_F_XXX flags.
6763 * @param fGstShwFlush The starting flush mask.
6764 */
6765DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
6766{
6767 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
6768 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
6769 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
6770 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
6771 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
6772 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
6773 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
6774 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
6775 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
6776 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
6777 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
6778 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
6779 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
6780 return fGstShwFlush;
6781}
6782
6783
6784/**
6785 * Emits a call to a CImpl function or something similar.
6786 */
6787DECL_HIDDEN_THROW(uint32_t)
6788iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6789 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6790{
6791 /* Writeback everything. */
6792 off = iemNativeRegFlushPendingWrites(pReNative, off);
6793
6794 /*
6795 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6796 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6797 */
6798 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6799 fGstShwFlush
6800 | RT_BIT_64(kIemNativeGstReg_Pc)
6801 | RT_BIT_64(kIemNativeGstReg_EFlags));
6802 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6803
6804 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6805
6806 /*
6807 * Load the parameters.
6808 */
6809#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6810 /* Special code the hidden VBOXSTRICTRC pointer. */
6811 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6812 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6813 if (cAddParams > 0)
6814 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6815 if (cAddParams > 1)
6816 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6817 if (cAddParams > 2)
6818 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6819 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6820
6821#else
6822 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6823 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6824 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6825 if (cAddParams > 0)
6826 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6827 if (cAddParams > 1)
6828 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6829 if (cAddParams > 2)
6830# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6831 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6832# else
6833 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6834# endif
6835#endif
6836
6837 /*
6838 * Make the call.
6839 */
6840 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6841
6842#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6843 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6844#endif
6845
6846 /*
6847 * Check the status code.
6848 */
6849 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6850}
6851
6852
6853/**
6854 * Emits a call to a threaded worker function.
6855 */
6856DECL_HIDDEN_THROW(uint32_t)
6857iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6858{
6859 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6860 off = iemNativeRegFlushPendingWrites(pReNative, off);
6861
6862 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6863 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6864
6865#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6866 /* The threaded function may throw / long jmp, so set current instruction
6867 number if we're counting. */
6868 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6869#endif
6870
6871 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6872
6873#ifdef RT_ARCH_AMD64
6874 /* Load the parameters and emit the call. */
6875# ifdef RT_OS_WINDOWS
6876# ifndef VBOXSTRICTRC_STRICT_ENABLED
6877 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6878 if (cParams > 0)
6879 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6880 if (cParams > 1)
6881 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6882 if (cParams > 2)
6883 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6884# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6885 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6886 if (cParams > 0)
6887 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6888 if (cParams > 1)
6889 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6890 if (cParams > 2)
6891 {
6892 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6893 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6894 }
6895 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6896# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6897# else
6898 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6899 if (cParams > 0)
6900 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6901 if (cParams > 1)
6902 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6903 if (cParams > 2)
6904 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6905# endif
6906
6907 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6908
6909# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6910 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6911# endif
6912
6913#elif RT_ARCH_ARM64
6914 /*
6915 * ARM64:
6916 */
6917 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6918 if (cParams > 0)
6919 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6920 if (cParams > 1)
6921 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6922 if (cParams > 2)
6923 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6924
6925 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6926
6927#else
6928# error "port me"
6929#endif
6930
6931 /*
6932 * Check the status code.
6933 */
6934 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6935
6936 return off;
6937}
6938
6939#ifdef VBOX_WITH_STATISTICS
6940/**
6941 * Emits code to update the thread call statistics.
6942 */
6943DECL_INLINE_THROW(uint32_t)
6944iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6945{
6946 /*
6947 * Update threaded function stats.
6948 */
6949 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6950 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6951# if defined(RT_ARCH_ARM64)
6952 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6953 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6954 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6955 iemNativeRegFreeTmp(pReNative, idxTmp1);
6956 iemNativeRegFreeTmp(pReNative, idxTmp2);
6957# else
6958 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6959# endif
6960 return off;
6961}
6962#endif /* VBOX_WITH_STATISTICS */
6963
6964
6965/**
6966 * Emits the code at the CheckBranchMiss label.
6967 */
6968static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6969{
6970 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
6971 if (idxLabel != UINT32_MAX)
6972 {
6973 iemNativeLabelDefine(pReNative, idxLabel, off);
6974
6975 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
6976 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6977 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
6978
6979 /* jump back to the return sequence. */
6980 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6981 }
6982 return off;
6983}
6984
6985
6986/**
6987 * Emits the code at the NeedCsLimChecking label.
6988 */
6989static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6990{
6991 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
6992 if (idxLabel != UINT32_MAX)
6993 {
6994 iemNativeLabelDefine(pReNative, idxLabel, off);
6995
6996 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
6997 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6998 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
6999
7000 /* jump back to the return sequence. */
7001 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7002 }
7003 return off;
7004}
7005
7006
7007/**
7008 * Emits the code at the ObsoleteTb label.
7009 */
7010static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7011{
7012 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
7013 if (idxLabel != UINT32_MAX)
7014 {
7015 iemNativeLabelDefine(pReNative, idxLabel, off);
7016
7017 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
7018 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7019 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
7020
7021 /* jump back to the return sequence. */
7022 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7023 }
7024 return off;
7025}
7026
7027
7028/**
7029 * Emits the code at the RaiseGP0 label.
7030 */
7031static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7032{
7033 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
7034 if (idxLabel != UINT32_MAX)
7035 {
7036 iemNativeLabelDefine(pReNative, idxLabel, off);
7037
7038 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
7039 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7040 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
7041
7042 /* jump back to the return sequence. */
7043 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7044 }
7045 return off;
7046}
7047
7048
7049/**
7050 * Emits the code at the RaiseNm label.
7051 */
7052static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7053{
7054 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
7055 if (idxLabel != UINT32_MAX)
7056 {
7057 iemNativeLabelDefine(pReNative, idxLabel, off);
7058
7059 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
7060 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7061 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
7062
7063 /* jump back to the return sequence. */
7064 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7065 }
7066 return off;
7067}
7068
7069
7070/**
7071 * Emits the code at the RaiseUd label.
7072 */
7073static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7074{
7075 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseUd);
7076 if (idxLabel != UINT32_MAX)
7077 {
7078 iemNativeLabelDefine(pReNative, idxLabel, off);
7079
7080 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
7081 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7082 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
7083
7084 /* jump back to the return sequence. */
7085 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7086 }
7087 return off;
7088}
7089
7090
7091/**
7092 * Emits the code at the RaiseMf label.
7093 */
7094static uint32_t iemNativeEmitRaiseMf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7095{
7096 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseMf);
7097 if (idxLabel != UINT32_MAX)
7098 {
7099 iemNativeLabelDefine(pReNative, idxLabel, off);
7100
7101 /* iemNativeHlpExecRaiseMf(PVMCPUCC pVCpu) */
7102 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7103 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseMf);
7104
7105 /* jump back to the return sequence. */
7106 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7107 }
7108 return off;
7109}
7110
7111
7112/**
7113 * Emits the code at the RaiseXf label.
7114 */
7115static uint32_t iemNativeEmitRaiseXf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7116{
7117 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseXf);
7118 if (idxLabel != UINT32_MAX)
7119 {
7120 iemNativeLabelDefine(pReNative, idxLabel, off);
7121
7122 /* iemNativeHlpExecRaiseXf(PVMCPUCC pVCpu) */
7123 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7124 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseXf);
7125
7126 /* jump back to the return sequence. */
7127 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7128 }
7129 return off;
7130}
7131
7132
7133/**
7134 * Emits the code at the ReturnWithFlags label (returns
7135 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
7136 */
7137static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7138{
7139 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
7140 if (idxLabel != UINT32_MAX)
7141 {
7142 iemNativeLabelDefine(pReNative, idxLabel, off);
7143
7144 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
7145
7146 /* jump back to the return sequence. */
7147 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7148 }
7149 return off;
7150}
7151
7152
7153/**
7154 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
7155 */
7156static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7157{
7158 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
7159 if (idxLabel != UINT32_MAX)
7160 {
7161 iemNativeLabelDefine(pReNative, idxLabel, off);
7162
7163 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
7164
7165 /* jump back to the return sequence. */
7166 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7167 }
7168 return off;
7169}
7170
7171
7172/**
7173 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
7174 */
7175static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7176{
7177 /*
7178 * Generate the rc + rcPassUp fiddling code if needed.
7179 */
7180 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
7181 if (idxLabel != UINT32_MAX)
7182 {
7183 iemNativeLabelDefine(pReNative, idxLabel, off);
7184
7185 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
7186#ifdef RT_ARCH_AMD64
7187# ifdef RT_OS_WINDOWS
7188# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7189 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
7190# endif
7191 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7192 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
7193# else
7194 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7195 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7196# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7197 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7198# endif
7199# endif
7200# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7201 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7202# endif
7203
7204#else
7205 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7206 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7207 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7208#endif
7209
7210 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7211 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7212 }
7213 return off;
7214}
7215
7216
7217/**
7218 * Emits a standard epilog.
7219 */
7220static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
7221{
7222 *pidxReturnLabel = UINT32_MAX;
7223
7224 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
7225 off = iemNativeRegFlushPendingWrites(pReNative, off);
7226
7227 /*
7228 * Successful return, so clear the return register (eax, w0).
7229 */
7230 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
7231
7232 /*
7233 * Define label for common return point.
7234 */
7235 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7236 *pidxReturnLabel = idxReturn;
7237
7238 /*
7239 * Restore registers and return.
7240 */
7241#ifdef RT_ARCH_AMD64
7242 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7243
7244 /* Reposition esp at the r15 restore point. */
7245 pbCodeBuf[off++] = X86_OP_REX_W;
7246 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7247 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7248 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7249
7250 /* Pop non-volatile registers and return */
7251 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7252 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7253 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7254 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7255 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7256 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7257 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7258 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7259# ifdef RT_OS_WINDOWS
7260 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7261 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7262# endif
7263 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7264 pbCodeBuf[off++] = 0xc9; /* leave */
7265 pbCodeBuf[off++] = 0xc3; /* ret */
7266 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7267
7268#elif RT_ARCH_ARM64
7269 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7270
7271 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7272 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7273 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7274 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7275 IEMNATIVE_FRAME_VAR_SIZE / 8);
7276 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7277 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7278 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7279 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7280 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7281 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7282 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7283 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7284 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7285 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7286 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7287 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7288
7289 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7290 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7291 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7292 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7293
7294 /* retab / ret */
7295# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7296 if (1)
7297 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7298 else
7299# endif
7300 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7301
7302#else
7303# error "port me"
7304#endif
7305 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7306
7307 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7308}
7309
7310
7311/**
7312 * Emits a standard prolog.
7313 */
7314static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7315{
7316#ifdef RT_ARCH_AMD64
7317 /*
7318 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7319 * reserving 64 bytes for stack variables plus 4 non-register argument
7320 * slots. Fixed register assignment: xBX = pReNative;
7321 *
7322 * Since we always do the same register spilling, we can use the same
7323 * unwind description for all the code.
7324 */
7325 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7326 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7327 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7328 pbCodeBuf[off++] = 0x8b;
7329 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7330 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7331 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7332# ifdef RT_OS_WINDOWS
7333 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7334 pbCodeBuf[off++] = 0x8b;
7335 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7336 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7337 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7338# else
7339 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7340 pbCodeBuf[off++] = 0x8b;
7341 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7342# endif
7343 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7344 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7345 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7346 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7347 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7348 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7349 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7350 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7351
7352# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7353 /* Save the frame pointer. */
7354 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7355# endif
7356
7357 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7358 X86_GREG_xSP,
7359 IEMNATIVE_FRAME_ALIGN_SIZE
7360 + IEMNATIVE_FRAME_VAR_SIZE
7361 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7362 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7363 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7364 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7365 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7366
7367#elif RT_ARCH_ARM64
7368 /*
7369 * We set up a stack frame exactly like on x86, only we have to push the
7370 * return address our selves here. We save all non-volatile registers.
7371 */
7372 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7373
7374# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7375 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7376 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7377 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7378 /* pacibsp */
7379 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7380# endif
7381
7382 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7383 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7384 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7385 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7386 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7387 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7388 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7389 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7390 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7391 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7392 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7393 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7394 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7395 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7396 /* Save the BP and LR (ret address) registers at the top of the frame. */
7397 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7398 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7399 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7400 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7401 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7402 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7403
7404 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7405 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7406
7407 /* mov r28, r0 */
7408 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7409 /* mov r27, r1 */
7410 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7411
7412# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7413 /* Save the frame pointer. */
7414 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7415 ARMV8_A64_REG_X2);
7416# endif
7417
7418#else
7419# error "port me"
7420#endif
7421 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7422 return off;
7423}
7424
7425
7426
7427
7428/*********************************************************************************************************************************
7429* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
7430*********************************************************************************************************************************/
7431
7432#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
7433 { \
7434 Assert(pReNative->Core.bmVars == 0); \
7435 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
7436 Assert(pReNative->Core.bmStack == 0); \
7437 pReNative->fMc = (a_fMcFlags); \
7438 pReNative->fCImpl = (a_fCImplFlags); \
7439 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
7440
7441/** We have to get to the end in recompilation mode, as otherwise we won't
7442 * generate code for all the IEM_MC_IF_XXX branches. */
7443#define IEM_MC_END() \
7444 iemNativeVarFreeAll(pReNative); \
7445 } return off
7446
7447
7448
7449/*********************************************************************************************************************************
7450* Native Emitter Support. *
7451*********************************************************************************************************************************/
7452
7453
7454#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
7455
7456#define IEM_MC_NATIVE_ELSE() } else {
7457
7458#define IEM_MC_NATIVE_ENDIF() } ((void)0)
7459
7460
7461#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
7462 off = a_fnEmitter(pReNative, off)
7463
7464#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
7465 off = a_fnEmitter(pReNative, off, (a0))
7466
7467#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
7468 off = a_fnEmitter(pReNative, off, (a0), (a1))
7469
7470#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
7471 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
7472
7473#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
7474 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
7475
7476#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
7477 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
7478
7479#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
7480 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
7481
7482#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
7483 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
7484
7485#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
7486 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
7487
7488
7489
7490/*********************************************************************************************************************************
7491* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
7492*********************************************************************************************************************************/
7493
7494#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
7495 pReNative->fMc = 0; \
7496 pReNative->fCImpl = (a_fFlags); \
7497 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
7498
7499
7500#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
7501 pReNative->fMc = 0; \
7502 pReNative->fCImpl = (a_fFlags); \
7503 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
7504
7505DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7506 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7507 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
7508{
7509 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
7510}
7511
7512
7513#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
7514 pReNative->fMc = 0; \
7515 pReNative->fCImpl = (a_fFlags); \
7516 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
7517 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
7518
7519DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7520 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7521 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
7522{
7523 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
7524}
7525
7526
7527#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
7528 pReNative->fMc = 0; \
7529 pReNative->fCImpl = (a_fFlags); \
7530 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
7531 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
7532
7533DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7534 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7535 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
7536 uint64_t uArg2)
7537{
7538 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
7539}
7540
7541
7542
7543/*********************************************************************************************************************************
7544* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
7545*********************************************************************************************************************************/
7546
7547/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
7548 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
7549DECL_INLINE_THROW(uint32_t)
7550iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7551{
7552 /*
7553 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
7554 * return with special status code and make the execution loop deal with
7555 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
7556 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
7557 * could continue w/o interruption, it probably will drop into the
7558 * debugger, so not worth the effort of trying to services it here and we
7559 * just lump it in with the handling of the others.
7560 *
7561 * To simplify the code and the register state management even more (wrt
7562 * immediate in AND operation), we always update the flags and skip the
7563 * extra check associated conditional jump.
7564 */
7565 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
7566 <= UINT32_MAX);
7567#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
7568 AssertMsg( pReNative->idxCurCall == 0
7569 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
7570 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
7571#endif
7572
7573 /*
7574 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
7575 * any pending register writes must be flushed.
7576 */
7577 off = iemNativeRegFlushPendingWrites(pReNative, off);
7578
7579 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7580 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
7581 true /*fSkipLivenessAssert*/);
7582 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
7583 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
7584 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
7585 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
7586 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
7587
7588 /* Free but don't flush the EFLAGS register. */
7589 iemNativeRegFreeTmp(pReNative, idxEflReg);
7590
7591 return off;
7592}
7593
7594
7595/** The VINF_SUCCESS dummy. */
7596template<int const a_rcNormal>
7597DECL_FORCE_INLINE(uint32_t)
7598iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7599{
7600 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
7601 if (a_rcNormal != VINF_SUCCESS)
7602 {
7603#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7604 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7605#else
7606 RT_NOREF_PV(idxInstr);
7607#endif
7608
7609 /* As this code returns from the TB any pending register writes must be flushed. */
7610 off = iemNativeRegFlushPendingWrites(pReNative, off);
7611
7612 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
7613 }
7614 return off;
7615}
7616
7617
7618#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
7619 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7620 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7621
7622#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7623 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7624 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7625 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7626
7627/** Same as iemRegAddToRip64AndFinishingNoFlags. */
7628DECL_INLINE_THROW(uint32_t)
7629iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7630{
7631#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7632# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7633 if (!pReNative->Core.offPc)
7634 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7635# endif
7636
7637 /* Allocate a temporary PC register. */
7638 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7639
7640 /* Perform the addition and store the result. */
7641 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
7642 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7643
7644 /* Free but don't flush the PC register. */
7645 iemNativeRegFreeTmp(pReNative, idxPcReg);
7646#endif
7647
7648#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7649 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7650
7651 pReNative->Core.offPc += cbInstr;
7652# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7653 off = iemNativePcAdjustCheck(pReNative, off);
7654# endif
7655 if (pReNative->cCondDepth)
7656 off = iemNativeEmitPcWriteback(pReNative, off);
7657 else
7658 pReNative->Core.cInstrPcUpdateSkipped++;
7659#endif
7660
7661 return off;
7662}
7663
7664
7665#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
7666 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7667 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7668
7669#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7670 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7671 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7672 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7673
7674/** Same as iemRegAddToEip32AndFinishingNoFlags. */
7675DECL_INLINE_THROW(uint32_t)
7676iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7677{
7678#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7679# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7680 if (!pReNative->Core.offPc)
7681 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7682# endif
7683
7684 /* Allocate a temporary PC register. */
7685 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7686
7687 /* Perform the addition and store the result. */
7688 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
7689 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7690
7691 /* Free but don't flush the PC register. */
7692 iemNativeRegFreeTmp(pReNative, idxPcReg);
7693#endif
7694
7695#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7696 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7697
7698 pReNative->Core.offPc += cbInstr;
7699# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7700 off = iemNativePcAdjustCheck(pReNative, off);
7701# endif
7702 if (pReNative->cCondDepth)
7703 off = iemNativeEmitPcWriteback(pReNative, off);
7704 else
7705 pReNative->Core.cInstrPcUpdateSkipped++;
7706#endif
7707
7708 return off;
7709}
7710
7711
7712#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
7713 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7714 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7715
7716#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7717 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7718 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7719 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7720
7721/** Same as iemRegAddToIp16AndFinishingNoFlags. */
7722DECL_INLINE_THROW(uint32_t)
7723iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7724{
7725#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7726# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7727 if (!pReNative->Core.offPc)
7728 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7729# endif
7730
7731 /* Allocate a temporary PC register. */
7732 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7733
7734 /* Perform the addition and store the result. */
7735 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
7736 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7737 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7738
7739 /* Free but don't flush the PC register. */
7740 iemNativeRegFreeTmp(pReNative, idxPcReg);
7741#endif
7742
7743#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7744 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7745
7746 pReNative->Core.offPc += cbInstr;
7747# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7748 off = iemNativePcAdjustCheck(pReNative, off);
7749# endif
7750 if (pReNative->cCondDepth)
7751 off = iemNativeEmitPcWriteback(pReNative, off);
7752 else
7753 pReNative->Core.cInstrPcUpdateSkipped++;
7754#endif
7755
7756 return off;
7757}
7758
7759
7760
7761/*********************************************************************************************************************************
7762* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
7763*********************************************************************************************************************************/
7764
7765#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7766 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7767 (a_enmEffOpSize), pCallEntry->idxInstr); \
7768 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7769
7770#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7771 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7772 (a_enmEffOpSize), pCallEntry->idxInstr); \
7773 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7774 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7775
7776#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
7777 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7778 IEMMODE_16BIT, pCallEntry->idxInstr); \
7779 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7780
7781#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7782 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7783 IEMMODE_16BIT, pCallEntry->idxInstr); \
7784 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7785 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7786
7787#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
7788 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7789 IEMMODE_64BIT, pCallEntry->idxInstr); \
7790 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7791
7792#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7793 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7794 IEMMODE_64BIT, pCallEntry->idxInstr); \
7795 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7796 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7797
7798/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
7799 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
7800 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
7801DECL_INLINE_THROW(uint32_t)
7802iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
7803 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
7804{
7805 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
7806
7807 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7808 off = iemNativeRegFlushPendingWrites(pReNative, off);
7809
7810#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7811 Assert(pReNative->Core.offPc == 0);
7812
7813 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7814#endif
7815
7816 /* Allocate a temporary PC register. */
7817 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7818
7819 /* Perform the addition. */
7820 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
7821
7822 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
7823 {
7824 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
7825 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7826 }
7827 else
7828 {
7829 /* Just truncate the result to 16-bit IP. */
7830 Assert(enmEffOpSize == IEMMODE_16BIT);
7831 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7832 }
7833 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7834
7835 /* Free but don't flush the PC register. */
7836 iemNativeRegFreeTmp(pReNative, idxPcReg);
7837
7838 return off;
7839}
7840
7841
7842#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7843 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7844 (a_enmEffOpSize), pCallEntry->idxInstr); \
7845 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7846
7847#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7848 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7849 (a_enmEffOpSize), pCallEntry->idxInstr); \
7850 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7851 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7852
7853#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
7854 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7855 IEMMODE_16BIT, pCallEntry->idxInstr); \
7856 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7857
7858#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7859 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7860 IEMMODE_16BIT, pCallEntry->idxInstr); \
7861 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7862 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7863
7864#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
7865 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7866 IEMMODE_32BIT, pCallEntry->idxInstr); \
7867 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7868
7869#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7870 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7871 IEMMODE_32BIT, pCallEntry->idxInstr); \
7872 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7873 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7874
7875/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
7876 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
7877 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
7878DECL_INLINE_THROW(uint32_t)
7879iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
7880 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
7881{
7882 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
7883
7884 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7885 off = iemNativeRegFlushPendingWrites(pReNative, off);
7886
7887#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7888 Assert(pReNative->Core.offPc == 0);
7889
7890 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7891#endif
7892
7893 /* Allocate a temporary PC register. */
7894 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7895
7896 /* Perform the addition. */
7897 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
7898
7899 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
7900 if (enmEffOpSize == IEMMODE_16BIT)
7901 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7902
7903 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
7904/** @todo we can skip this in 32-bit FLAT mode. */
7905 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7906
7907 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7908
7909 /* Free but don't flush the PC register. */
7910 iemNativeRegFreeTmp(pReNative, idxPcReg);
7911
7912 return off;
7913}
7914
7915
7916#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
7917 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
7918 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7919
7920#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
7921 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
7922 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7923 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7924
7925#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
7926 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
7927 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7928
7929#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7930 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
7931 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7932 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7933
7934#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
7935 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
7936 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7937
7938#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7939 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
7940 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7941 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7942
7943/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
7944DECL_INLINE_THROW(uint32_t)
7945iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7946 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
7947{
7948 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7949 off = iemNativeRegFlushPendingWrites(pReNative, off);
7950
7951#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7952 Assert(pReNative->Core.offPc == 0);
7953
7954 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7955#endif
7956
7957 /* Allocate a temporary PC register. */
7958 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7959
7960 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
7961 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
7962 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7963 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7964 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7965
7966 /* Free but don't flush the PC register. */
7967 iemNativeRegFreeTmp(pReNative, idxPcReg);
7968
7969 return off;
7970}
7971
7972
7973
7974/*********************************************************************************************************************************
7975* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
7976*********************************************************************************************************************************/
7977
7978/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
7979#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
7980 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7981
7982/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
7983#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
7984 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7985
7986/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
7987#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
7988 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7989
7990/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
7991 * clears flags. */
7992#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
7993 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
7994 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7995
7996/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
7997 * clears flags. */
7998#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
7999 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
8000 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
8001
8002/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
8003 * clears flags. */
8004#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
8005 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
8006 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
8007
8008#undef IEM_MC_SET_RIP_U16_AND_FINISH
8009
8010
8011/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
8012#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
8013 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
8014
8015/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
8016#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
8017 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
8018
8019/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
8020 * clears flags. */
8021#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
8022 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
8023 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
8024
8025/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
8026 * and clears flags. */
8027#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
8028 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
8029 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
8030
8031#undef IEM_MC_SET_RIP_U32_AND_FINISH
8032
8033
8034/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
8035#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
8036 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
8037
8038/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
8039 * and clears flags. */
8040#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
8041 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
8042 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
8043
8044#undef IEM_MC_SET_RIP_U64_AND_FINISH
8045
8046
8047/** Same as iemRegRipJumpU16AndFinishNoFlags,
8048 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
8049DECL_INLINE_THROW(uint32_t)
8050iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
8051 uint8_t idxInstr, uint8_t cbVar)
8052{
8053 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
8054 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
8055
8056 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
8057 off = iemNativeRegFlushPendingWrites(pReNative, off);
8058
8059#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8060 Assert(pReNative->Core.offPc == 0);
8061
8062 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
8063#endif
8064
8065 /* Get a register with the new PC loaded from idxVarPc.
8066 Note! This ASSUMES that the high bits of the GPR is zeroed. */
8067 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
8068
8069 /* Check limit (may #GP(0) + exit TB). */
8070 if (!f64Bit)
8071/** @todo we can skip this test in FLAT 32-bit mode. */
8072 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
8073 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
8074 else if (cbVar > sizeof(uint32_t))
8075 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
8076
8077 /* Store the result. */
8078 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
8079
8080 iemNativeVarRegisterRelease(pReNative, idxVarPc);
8081 /** @todo implictly free the variable? */
8082
8083 return off;
8084}
8085
8086
8087
8088/*********************************************************************************************************************************
8089* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
8090*********************************************************************************************************************************/
8091
8092#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
8093 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
8094
8095/**
8096 * Emits code to check if a \#NM exception should be raised.
8097 *
8098 * @returns New code buffer offset, UINT32_MAX on failure.
8099 * @param pReNative The native recompile state.
8100 * @param off The code buffer offset.
8101 * @param idxInstr The current instruction.
8102 */
8103DECL_INLINE_THROW(uint32_t)
8104iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8105{
8106 /*
8107 * Make sure we don't have any outstanding guest register writes as we may
8108 * raise an #NM and all guest register must be up to date in CPUMCTX.
8109 *
8110 * @todo r=aeichner Can we postpone this to the RaiseNm path?
8111 */
8112 off = iemNativeRegFlushPendingWrites(pReNative, off);
8113
8114#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8115 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8116#else
8117 RT_NOREF(idxInstr);
8118#endif
8119
8120 /* Allocate a temporary CR0 register. */
8121 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8122 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8123
8124 /*
8125 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
8126 * return raisexcpt();
8127 */
8128 /* Test and jump. */
8129 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
8130
8131 /* Free but don't flush the CR0 register. */
8132 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8133
8134 return off;
8135}
8136
8137
8138#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
8139 off = iemNativeEmitMaybeFpuException(pReNative, off, pCallEntry->idxInstr)
8140
8141/**
8142 * Emits code to check if a \#MF exception should be raised.
8143 *
8144 * @returns New code buffer offset, UINT32_MAX on failure.
8145 * @param pReNative The native recompile state.
8146 * @param off The code buffer offset.
8147 * @param idxInstr The current instruction.
8148 */
8149DECL_INLINE_THROW(uint32_t)
8150iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8151{
8152 /*
8153 * Make sure we don't have any outstanding guest register writes as we may
8154 * raise an #MF and all guest register must be up to date in CPUMCTX.
8155 *
8156 * @todo r=aeichner Can we postpone this to the RaiseMf path?
8157 */
8158 off = iemNativeRegFlushPendingWrites(pReNative, off);
8159
8160#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8161 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8162#else
8163 RT_NOREF(idxInstr);
8164#endif
8165
8166 /* Allocate a temporary FSW register. */
8167 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
8168 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
8169
8170 /*
8171 * if (FSW & X86_FSW_ES != 0)
8172 * return raisexcpt();
8173 */
8174 /* Test and jump. */
8175 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
8176
8177 /* Free but don't flush the FSW register. */
8178 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
8179
8180 return off;
8181}
8182
8183
8184#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
8185 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
8186
8187/**
8188 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
8189 *
8190 * @returns New code buffer offset, UINT32_MAX on failure.
8191 * @param pReNative The native recompile state.
8192 * @param off The code buffer offset.
8193 * @param idxInstr The current instruction.
8194 */
8195DECL_INLINE_THROW(uint32_t)
8196iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8197{
8198 /*
8199 * Make sure we don't have any outstanding guest register writes as we may
8200 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8201 *
8202 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8203 */
8204 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
8205
8206#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8207 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8208#else
8209 RT_NOREF(idxInstr);
8210#endif
8211
8212 /* Allocate a temporary CR0 and CR4 register. */
8213 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8214 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8215 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8216 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8217
8218 /** @todo r=aeichner Optimize this more later to have less compares and branches,
8219 * (see IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() in IEMMc.h but check that it has some
8220 * actual performance benefit first). */
8221 /*
8222 * if (cr0 & X86_CR0_EM)
8223 * return raisexcpt();
8224 */
8225 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_EM_BIT, idxLabelRaiseUd);
8226 /*
8227 * if (!(cr4 & X86_CR4_OSFXSR))
8228 * return raisexcpt();
8229 */
8230 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSFXSR_BIT, idxLabelRaiseUd);
8231 /*
8232 * if (cr0 & X86_CR0_TS)
8233 * return raisexcpt();
8234 */
8235 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
8236
8237 /* Free but don't flush the CR0 and CR4 register. */
8238 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8239 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8240
8241 return off;
8242}
8243
8244
8245#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
8246 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
8247
8248/**
8249 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
8250 *
8251 * @returns New code buffer offset, UINT32_MAX on failure.
8252 * @param pReNative The native recompile state.
8253 * @param off The code buffer offset.
8254 * @param idxInstr The current instruction.
8255 */
8256DECL_INLINE_THROW(uint32_t)
8257iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8258{
8259 /*
8260 * Make sure we don't have any outstanding guest register writes as we may
8261 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8262 *
8263 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8264 */
8265 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
8266
8267#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8268 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8269#else
8270 RT_NOREF(idxInstr);
8271#endif
8272
8273 /* Allocate a temporary CR0, CR4 and XCR0 register. */
8274 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8275 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8276 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0, kIemNativeGstRegUse_ReadOnly);
8277 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8278 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8279
8280 /** @todo r=aeichner Optimize this more later to have less compares and branches,
8281 * (see IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() in IEMMc.h but check that it has some
8282 * actual performance benefit first). */
8283 /*
8284 * if ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE))
8285 * return raisexcpt();
8286 */
8287 const uint8_t idxRegTmp = iemNativeRegAllocTmpImm(pReNative, &off, XSAVE_C_YMM | XSAVE_C_SSE);
8288 off = iemNativeEmitAndGprByGpr(pReNative, off, idxRegTmp, idxXcr0Reg);
8289 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, idxRegTmp, XSAVE_C_YMM | XSAVE_C_SSE, idxLabelRaiseUd);
8290 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8291
8292 /*
8293 * if (!(cr4 & X86_CR4_OSXSAVE))
8294 * return raisexcpt();
8295 */
8296 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT, idxLabelRaiseUd);
8297 /*
8298 * if (cr0 & X86_CR0_TS)
8299 * return raisexcpt();
8300 */
8301 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
8302
8303 /* Free but don't flush the CR0, CR4 and XCR0 register. */
8304 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8305 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8306 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
8307
8308 return off;
8309}
8310
8311
8312#define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
8313 off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
8314
8315/**
8316 * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
8317 *
8318 * @returns New code buffer offset, UINT32_MAX on failure.
8319 * @param pReNative The native recompile state.
8320 * @param off The code buffer offset.
8321 * @param idxInstr The current instruction.
8322 */
8323DECL_INLINE_THROW(uint32_t)
8324iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8325{
8326 /*
8327 * Make sure we don't have any outstanding guest register writes as we may
8328 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8329 *
8330 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8331 */
8332 off = iemNativeRegFlushPendingWrites(pReNative, off);
8333
8334#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8335 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8336#else
8337 RT_NOREF(idxInstr);
8338#endif
8339
8340 /* Allocate a temporary CR4 register. */
8341 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8342 uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
8343 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8344
8345 /*
8346 * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
8347 * return raisexcpt();
8348 */
8349 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
8350
8351 /* raise \#UD exception unconditionally. */
8352 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
8353
8354 /* Free but don't flush the CR4 register. */
8355 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8356
8357 return off;
8358}
8359
8360
8361
8362/*********************************************************************************************************************************
8363* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
8364*********************************************************************************************************************************/
8365
8366/**
8367 * Pushes an IEM_MC_IF_XXX onto the condition stack.
8368 *
8369 * @returns Pointer to the condition stack entry on success, NULL on failure
8370 * (too many nestings)
8371 */
8372DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
8373{
8374#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8375 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
8376#endif
8377
8378 uint32_t const idxStack = pReNative->cCondDepth;
8379 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
8380
8381 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
8382 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
8383
8384 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
8385 pEntry->fInElse = false;
8386 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
8387 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
8388
8389 return pEntry;
8390}
8391
8392
8393/**
8394 * Start of the if-block, snapshotting the register and variable state.
8395 */
8396DECL_INLINE_THROW(void)
8397iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
8398{
8399 Assert(offIfBlock != UINT32_MAX);
8400 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8401 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8402 Assert(!pEntry->fInElse);
8403
8404 /* Define the start of the IF block if request or for disassembly purposes. */
8405 if (idxLabelIf != UINT32_MAX)
8406 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
8407#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8408 else
8409 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
8410#else
8411 RT_NOREF(offIfBlock);
8412#endif
8413
8414#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8415 Assert(pReNative->Core.offPc == 0);
8416#endif
8417
8418 /* Copy the initial state so we can restore it in the 'else' block. */
8419 pEntry->InitialState = pReNative->Core;
8420}
8421
8422
8423#define IEM_MC_ELSE() } while (0); \
8424 off = iemNativeEmitElse(pReNative, off); \
8425 do {
8426
8427/** Emits code related to IEM_MC_ELSE. */
8428DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8429{
8430 /* Check sanity and get the conditional stack entry. */
8431 Assert(off != UINT32_MAX);
8432 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8433 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8434 Assert(!pEntry->fInElse);
8435
8436 /* Jump to the endif */
8437 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
8438
8439 /* Define the else label and enter the else part of the condition. */
8440 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
8441 pEntry->fInElse = true;
8442
8443#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8444 Assert(pReNative->Core.offPc == 0);
8445#endif
8446
8447 /* Snapshot the core state so we can do a merge at the endif and restore
8448 the snapshot we took at the start of the if-block. */
8449 pEntry->IfFinalState = pReNative->Core;
8450 pReNative->Core = pEntry->InitialState;
8451
8452 return off;
8453}
8454
8455
8456#define IEM_MC_ENDIF() } while (0); \
8457 off = iemNativeEmitEndIf(pReNative, off)
8458
8459/** Emits code related to IEM_MC_ENDIF. */
8460DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8461{
8462 /* Check sanity and get the conditional stack entry. */
8463 Assert(off != UINT32_MAX);
8464 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8465 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8466
8467#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8468 Assert(pReNative->Core.offPc == 0);
8469#endif
8470
8471 /*
8472 * Now we have find common group with the core state at the end of the
8473 * if-final. Use the smallest common denominator and just drop anything
8474 * that isn't the same in both states.
8475 */
8476 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
8477 * which is why we're doing this at the end of the else-block.
8478 * But we'd need more info about future for that to be worth the effort. */
8479 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
8480 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
8481 {
8482 /* shadow guest stuff first. */
8483 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
8484 if (fGstRegs)
8485 {
8486 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
8487 do
8488 {
8489 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
8490 fGstRegs &= ~RT_BIT_64(idxGstReg);
8491
8492 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
8493 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
8494 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
8495 {
8496 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
8497 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
8498 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
8499 }
8500 } while (fGstRegs);
8501 }
8502 else
8503 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
8504
8505 /* Check variables next. For now we must require them to be identical
8506 or stuff we can recreate. */
8507 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
8508 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
8509 if (fVars)
8510 {
8511 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
8512 do
8513 {
8514 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
8515 fVars &= ~RT_BIT_32(idxVar);
8516
8517 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
8518 {
8519 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
8520 continue;
8521 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
8522 {
8523 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8524 if (idxHstReg != UINT8_MAX)
8525 {
8526 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8527 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8528 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
8529 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8530 }
8531 continue;
8532 }
8533 }
8534 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
8535 continue;
8536
8537 /* Irreconcilable, so drop it. */
8538 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8539 if (idxHstReg != UINT8_MAX)
8540 {
8541 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8542 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8543 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
8544 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8545 }
8546 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8547 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8548 } while (fVars);
8549 }
8550
8551 /* Finally, check that the host register allocations matches. */
8552 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
8553 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
8554 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
8555 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
8556 }
8557
8558 /*
8559 * Define the endif label and maybe the else one if we're still in the 'if' part.
8560 */
8561 if (!pEntry->fInElse)
8562 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
8563 else
8564 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
8565 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
8566
8567 /* Pop the conditional stack.*/
8568 pReNative->cCondDepth -= 1;
8569
8570 return off;
8571}
8572
8573
8574#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
8575 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
8576 do {
8577
8578/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
8579DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
8580{
8581 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8582
8583 /* Get the eflags. */
8584 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8585 kIemNativeGstRegUse_ReadOnly);
8586
8587 /* Test and jump. */
8588 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
8589
8590 /* Free but don't flush the EFlags register. */
8591 iemNativeRegFreeTmp(pReNative, idxEflReg);
8592
8593 /* Make a copy of the core state now as we start the if-block. */
8594 iemNativeCondStartIfBlock(pReNative, off);
8595
8596 return off;
8597}
8598
8599
8600#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
8601 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
8602 do {
8603
8604/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
8605DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
8606{
8607 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8608
8609 /* Get the eflags. */
8610 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8611 kIemNativeGstRegUse_ReadOnly);
8612
8613 /* Test and jump. */
8614 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
8615
8616 /* Free but don't flush the EFlags register. */
8617 iemNativeRegFreeTmp(pReNative, idxEflReg);
8618
8619 /* Make a copy of the core state now as we start the if-block. */
8620 iemNativeCondStartIfBlock(pReNative, off);
8621
8622 return off;
8623}
8624
8625
8626#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
8627 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
8628 do {
8629
8630/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
8631DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
8632{
8633 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8634
8635 /* Get the eflags. */
8636 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8637 kIemNativeGstRegUse_ReadOnly);
8638
8639 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8640 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8641
8642 /* Test and jump. */
8643 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8644
8645 /* Free but don't flush the EFlags register. */
8646 iemNativeRegFreeTmp(pReNative, idxEflReg);
8647
8648 /* Make a copy of the core state now as we start the if-block. */
8649 iemNativeCondStartIfBlock(pReNative, off);
8650
8651 return off;
8652}
8653
8654
8655#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
8656 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
8657 do {
8658
8659/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
8660DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
8661{
8662 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8663
8664 /* Get the eflags. */
8665 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8666 kIemNativeGstRegUse_ReadOnly);
8667
8668 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8669 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8670
8671 /* Test and jump. */
8672 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8673
8674 /* Free but don't flush the EFlags register. */
8675 iemNativeRegFreeTmp(pReNative, idxEflReg);
8676
8677 /* Make a copy of the core state now as we start the if-block. */
8678 iemNativeCondStartIfBlock(pReNative, off);
8679
8680 return off;
8681}
8682
8683
8684#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
8685 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
8686 do {
8687
8688#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
8689 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
8690 do {
8691
8692/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
8693DECL_INLINE_THROW(uint32_t)
8694iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8695 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
8696{
8697 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8698
8699 /* Get the eflags. */
8700 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8701 kIemNativeGstRegUse_ReadOnly);
8702
8703 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
8704 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
8705
8706 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
8707 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
8708 Assert(iBitNo1 != iBitNo2);
8709
8710#ifdef RT_ARCH_AMD64
8711 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
8712
8713 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8714 if (iBitNo1 > iBitNo2)
8715 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
8716 else
8717 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
8718 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8719
8720#elif defined(RT_ARCH_ARM64)
8721 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8722 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8723
8724 /* and tmpreg, eflreg, #1<<iBitNo1 */
8725 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
8726
8727 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
8728 if (iBitNo1 > iBitNo2)
8729 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8730 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
8731 else
8732 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8733 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
8734
8735 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8736
8737#else
8738# error "Port me"
8739#endif
8740
8741 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
8742 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
8743 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
8744
8745 /* Free but don't flush the EFlags and tmp registers. */
8746 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8747 iemNativeRegFreeTmp(pReNative, idxEflReg);
8748
8749 /* Make a copy of the core state now as we start the if-block. */
8750 iemNativeCondStartIfBlock(pReNative, off);
8751
8752 return off;
8753}
8754
8755
8756#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
8757 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
8758 do {
8759
8760#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
8761 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
8762 do {
8763
8764/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
8765 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
8766DECL_INLINE_THROW(uint32_t)
8767iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
8768 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
8769{
8770 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8771
8772 /* We need an if-block label for the non-inverted variant. */
8773 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
8774 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
8775
8776 /* Get the eflags. */
8777 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8778 kIemNativeGstRegUse_ReadOnly);
8779
8780 /* Translate the flag masks to bit numbers. */
8781 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8782 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8783
8784 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
8785 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
8786 Assert(iBitNo1 != iBitNo);
8787
8788 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
8789 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
8790 Assert(iBitNo2 != iBitNo);
8791 Assert(iBitNo2 != iBitNo1);
8792
8793#ifdef RT_ARCH_AMD64
8794 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
8795#elif defined(RT_ARCH_ARM64)
8796 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8797#endif
8798
8799 /* Check for the lone bit first. */
8800 if (!fInverted)
8801 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8802 else
8803 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
8804
8805 /* Then extract and compare the other two bits. */
8806#ifdef RT_ARCH_AMD64
8807 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8808 if (iBitNo1 > iBitNo2)
8809 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
8810 else
8811 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
8812 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8813
8814#elif defined(RT_ARCH_ARM64)
8815 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8816
8817 /* and tmpreg, eflreg, #1<<iBitNo1 */
8818 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
8819
8820 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
8821 if (iBitNo1 > iBitNo2)
8822 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8823 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
8824 else
8825 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8826 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
8827
8828 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8829
8830#else
8831# error "Port me"
8832#endif
8833
8834 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
8835 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
8836 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
8837
8838 /* Free but don't flush the EFlags and tmp registers. */
8839 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8840 iemNativeRegFreeTmp(pReNative, idxEflReg);
8841
8842 /* Make a copy of the core state now as we start the if-block. */
8843 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
8844
8845 return off;
8846}
8847
8848
8849#define IEM_MC_IF_CX_IS_NZ() \
8850 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
8851 do {
8852
8853/** Emits code for IEM_MC_IF_CX_IS_NZ. */
8854DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8855{
8856 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8857
8858 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8859 kIemNativeGstRegUse_ReadOnly);
8860 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
8861 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8862
8863 iemNativeCondStartIfBlock(pReNative, off);
8864 return off;
8865}
8866
8867
8868#define IEM_MC_IF_ECX_IS_NZ() \
8869 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
8870 do {
8871
8872#define IEM_MC_IF_RCX_IS_NZ() \
8873 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
8874 do {
8875
8876/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
8877DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
8878{
8879 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8880
8881 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8882 kIemNativeGstRegUse_ReadOnly);
8883 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
8884 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8885
8886 iemNativeCondStartIfBlock(pReNative, off);
8887 return off;
8888}
8889
8890
8891#define IEM_MC_IF_CX_IS_NOT_ONE() \
8892 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
8893 do {
8894
8895/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
8896DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8897{
8898 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8899
8900 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8901 kIemNativeGstRegUse_ReadOnly);
8902#ifdef RT_ARCH_AMD64
8903 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8904#else
8905 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8906 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
8907 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8908#endif
8909 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8910
8911 iemNativeCondStartIfBlock(pReNative, off);
8912 return off;
8913}
8914
8915
8916#define IEM_MC_IF_ECX_IS_NOT_ONE() \
8917 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
8918 do {
8919
8920#define IEM_MC_IF_RCX_IS_NOT_ONE() \
8921 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
8922 do {
8923
8924/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
8925DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
8926{
8927 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8928
8929 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8930 kIemNativeGstRegUse_ReadOnly);
8931 if (f64Bit)
8932 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8933 else
8934 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8935 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8936
8937 iemNativeCondStartIfBlock(pReNative, off);
8938 return off;
8939}
8940
8941
8942#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8943 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
8944 do {
8945
8946#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8947 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
8948 do {
8949
8950/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
8951 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
8952DECL_INLINE_THROW(uint32_t)
8953iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
8954{
8955 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8956
8957 /* We have to load both RCX and EFLAGS before we can start branching,
8958 otherwise we'll end up in the else-block with an inconsistent
8959 register allocator state.
8960 Doing EFLAGS first as it's more likely to be loaded, right? */
8961 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8962 kIemNativeGstRegUse_ReadOnly);
8963 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8964 kIemNativeGstRegUse_ReadOnly);
8965
8966 /** @todo we could reduce this to a single branch instruction by spending a
8967 * temporary register and some setnz stuff. Not sure if loops are
8968 * worth it. */
8969 /* Check CX. */
8970#ifdef RT_ARCH_AMD64
8971 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8972#else
8973 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8974 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
8975 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8976#endif
8977
8978 /* Check the EFlags bit. */
8979 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8980 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8981 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
8982 !fCheckIfSet /*fJmpIfSet*/);
8983
8984 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8985 iemNativeRegFreeTmp(pReNative, idxEflReg);
8986
8987 iemNativeCondStartIfBlock(pReNative, off);
8988 return off;
8989}
8990
8991
8992#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8993 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
8994 do {
8995
8996#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8997 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
8998 do {
8999
9000#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
9001 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
9002 do {
9003
9004#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
9005 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
9006 do {
9007
9008/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
9009 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
9010 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
9011 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
9012DECL_INLINE_THROW(uint32_t)
9013iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
9014 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
9015{
9016 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
9017
9018 /* We have to load both RCX and EFLAGS before we can start branching,
9019 otherwise we'll end up in the else-block with an inconsistent
9020 register allocator state.
9021 Doing EFLAGS first as it's more likely to be loaded, right? */
9022 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
9023 kIemNativeGstRegUse_ReadOnly);
9024 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
9025 kIemNativeGstRegUse_ReadOnly);
9026
9027 /** @todo we could reduce this to a single branch instruction by spending a
9028 * temporary register and some setnz stuff. Not sure if loops are
9029 * worth it. */
9030 /* Check RCX/ECX. */
9031 if (f64Bit)
9032 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
9033 else
9034 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
9035
9036 /* Check the EFlags bit. */
9037 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
9038 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
9039 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
9040 !fCheckIfSet /*fJmpIfSet*/);
9041
9042 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
9043 iemNativeRegFreeTmp(pReNative, idxEflReg);
9044
9045 iemNativeCondStartIfBlock(pReNative, off);
9046 return off;
9047}
9048
9049
9050
9051/*********************************************************************************************************************************
9052* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
9053*********************************************************************************************************************************/
9054/** Number of hidden arguments for CIMPL calls.
9055 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
9056#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
9057# define IEM_CIMPL_HIDDEN_ARGS 3
9058#else
9059# define IEM_CIMPL_HIDDEN_ARGS 2
9060#endif
9061
9062#define IEM_MC_NOREF(a_Name) \
9063 RT_NOREF_PV(a_Name)
9064
9065#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
9066 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
9067
9068#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
9069 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
9070
9071#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
9072 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
9073
9074#define IEM_MC_LOCAL(a_Type, a_Name) \
9075 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
9076
9077#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
9078 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
9079
9080
9081/**
9082 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
9083 */
9084DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
9085{
9086 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
9087 return IEM_CIMPL_HIDDEN_ARGS;
9088 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
9089 return 1;
9090 return 0;
9091}
9092
9093
9094/**
9095 * Internal work that allocates a variable with kind set to
9096 * kIemNativeVarKind_Invalid and no current stack allocation.
9097 *
9098 * The kind will either be set by the caller or later when the variable is first
9099 * assigned a value.
9100 *
9101 * @returns Unpacked index.
9102 * @internal
9103 */
9104static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
9105{
9106 Assert(cbType > 0 && cbType <= 64);
9107 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
9108 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
9109 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
9110 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
9111 pReNative->Core.aVars[idxVar].cbVar = cbType;
9112 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
9113 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9114 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
9115 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
9116 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
9117 pReNative->Core.aVars[idxVar].fRegAcquired = false;
9118 pReNative->Core.aVars[idxVar].u.uValue = 0;
9119 return idxVar;
9120}
9121
9122
9123/**
9124 * Internal work that allocates an argument variable w/o setting enmKind.
9125 *
9126 * @returns Unpacked index.
9127 * @internal
9128 */
9129static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
9130{
9131 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
9132 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
9133 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
9134
9135 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
9136 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
9137 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
9138 return idxVar;
9139}
9140
9141
9142/**
9143 * Gets the stack slot for a stack variable, allocating one if necessary.
9144 *
9145 * Calling this function implies that the stack slot will contain a valid
9146 * variable value. The caller deals with any register currently assigned to the
9147 * variable, typically by spilling it into the stack slot.
9148 *
9149 * @returns The stack slot number.
9150 * @param pReNative The recompiler state.
9151 * @param idxVar The variable.
9152 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
9153 */
9154DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9155{
9156 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9157 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9158 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
9159
9160 /* Already got a slot? */
9161 uint8_t const idxStackSlot = pVar->idxStackSlot;
9162 if (idxStackSlot != UINT8_MAX)
9163 {
9164 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
9165 return idxStackSlot;
9166 }
9167
9168 /*
9169 * A single slot is easy to allocate.
9170 * Allocate them from the top end, closest to BP, to reduce the displacement.
9171 */
9172 if (pVar->cbVar <= sizeof(uint64_t))
9173 {
9174 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
9175 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9176 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
9177 pVar->idxStackSlot = (uint8_t)iSlot;
9178 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
9179 return (uint8_t)iSlot;
9180 }
9181
9182 /*
9183 * We need more than one stack slot.
9184 *
9185 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
9186 */
9187 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
9188 Assert(pVar->cbVar <= 64);
9189 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
9190 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
9191 uint32_t bmStack = ~pReNative->Core.bmStack;
9192 while (bmStack != UINT32_MAX)
9193 {
9194/** @todo allocate from the top to reduce BP displacement. */
9195 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
9196 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9197 if (!(iSlot & fBitAlignMask))
9198 {
9199 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
9200 {
9201 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
9202 pVar->idxStackSlot = (uint8_t)iSlot;
9203 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
9204 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
9205 return (uint8_t)iSlot;
9206 }
9207 }
9208 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
9209 }
9210 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9211}
9212
9213
9214/**
9215 * Changes the variable to a stack variable.
9216 *
9217 * Currently this is s only possible to do the first time the variable is used,
9218 * switching later is can be implemented but not done.
9219 *
9220 * @param pReNative The recompiler state.
9221 * @param idxVar The variable.
9222 * @throws VERR_IEM_VAR_IPE_2
9223 */
9224static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9225{
9226 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9227 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9228 if (pVar->enmKind != kIemNativeVarKind_Stack)
9229 {
9230 /* We could in theory transition from immediate to stack as well, but it
9231 would involve the caller doing work storing the value on the stack. So,
9232 till that's required we only allow transition from invalid. */
9233 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9234 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9235 pVar->enmKind = kIemNativeVarKind_Stack;
9236
9237 /* Note! We don't allocate a stack slot here, that's only done when a
9238 slot is actually needed to hold a variable value. */
9239 }
9240}
9241
9242
9243/**
9244 * Sets it to a variable with a constant value.
9245 *
9246 * This does not require stack storage as we know the value and can always
9247 * reload it, unless of course it's referenced.
9248 *
9249 * @param pReNative The recompiler state.
9250 * @param idxVar The variable.
9251 * @param uValue The immediate value.
9252 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
9253 */
9254static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
9255{
9256 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9257 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9258 if (pVar->enmKind != kIemNativeVarKind_Immediate)
9259 {
9260 /* Only simple transitions for now. */
9261 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9262 pVar->enmKind = kIemNativeVarKind_Immediate;
9263 }
9264 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9265
9266 pVar->u.uValue = uValue;
9267 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
9268 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
9269 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
9270}
9271
9272
9273/**
9274 * Sets the variable to a reference (pointer) to @a idxOtherVar.
9275 *
9276 * This does not require stack storage as we know the value and can always
9277 * reload it. Loading is postponed till needed.
9278 *
9279 * @param pReNative The recompiler state.
9280 * @param idxVar The variable. Unpacked.
9281 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
9282 *
9283 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
9284 * @internal
9285 */
9286static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
9287{
9288 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
9289 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
9290
9291 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
9292 {
9293 /* Only simple transitions for now. */
9294 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
9295 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9296 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
9297 }
9298 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9299
9300 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
9301
9302 /* Update the other variable, ensure it's a stack variable. */
9303 /** @todo handle variables with const values... that'll go boom now. */
9304 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
9305 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9306}
9307
9308
9309/**
9310 * Sets the variable to a reference (pointer) to a guest register reference.
9311 *
9312 * This does not require stack storage as we know the value and can always
9313 * reload it. Loading is postponed till needed.
9314 *
9315 * @param pReNative The recompiler state.
9316 * @param idxVar The variable.
9317 * @param enmRegClass The class guest registers to reference.
9318 * @param idxReg The register within @a enmRegClass to reference.
9319 *
9320 * @throws VERR_IEM_VAR_IPE_2
9321 */
9322static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
9323 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
9324{
9325 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9326 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9327
9328 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
9329 {
9330 /* Only simple transitions for now. */
9331 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9332 pVar->enmKind = kIemNativeVarKind_GstRegRef;
9333 }
9334 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9335
9336 pVar->u.GstRegRef.enmClass = enmRegClass;
9337 pVar->u.GstRegRef.idx = idxReg;
9338}
9339
9340
9341DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
9342{
9343 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
9344}
9345
9346
9347DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
9348{
9349 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
9350
9351 /* Since we're using a generic uint64_t value type, we must truncate it if
9352 the variable is smaller otherwise we may end up with too large value when
9353 scaling up a imm8 w/ sign-extension.
9354
9355 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
9356 in the bios, bx=1) when running on arm, because clang expect 16-bit
9357 register parameters to have bits 16 and up set to zero. Instead of
9358 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
9359 CF value in the result. */
9360 switch (cbType)
9361 {
9362 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
9363 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
9364 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
9365 }
9366 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
9367 return idxVar;
9368}
9369
9370
9371DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
9372{
9373 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
9374 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
9375 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
9376 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
9377 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
9378 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
9379
9380 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
9381 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
9382 return idxArgVar;
9383}
9384
9385
9386DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
9387{
9388 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
9389 /* Don't set to stack now, leave that to the first use as for instance
9390 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
9391 return idxVar;
9392}
9393
9394
9395DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
9396{
9397 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
9398
9399 /* Since we're using a generic uint64_t value type, we must truncate it if
9400 the variable is smaller otherwise we may end up with too large value when
9401 scaling up a imm8 w/ sign-extension. */
9402 switch (cbType)
9403 {
9404 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
9405 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
9406 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
9407 }
9408 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
9409 return idxVar;
9410}
9411
9412
9413/**
9414 * Makes sure variable @a idxVar has a register assigned to it and that it stays
9415 * fixed till we call iemNativeVarRegisterRelease.
9416 *
9417 * @returns The host register number.
9418 * @param pReNative The recompiler state.
9419 * @param idxVar The variable.
9420 * @param poff Pointer to the instruction buffer offset.
9421 * In case a register needs to be freed up or the value
9422 * loaded off the stack.
9423 * @param fInitialized Set if the variable must already have been initialized.
9424 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
9425 * the case.
9426 * @param idxRegPref Preferred register number or UINT8_MAX.
9427 */
9428DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
9429 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
9430{
9431 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9432 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9433 Assert(pVar->cbVar <= 8);
9434 Assert(!pVar->fRegAcquired);
9435
9436 uint8_t idxReg = pVar->idxReg;
9437 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9438 {
9439 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
9440 && pVar->enmKind < kIemNativeVarKind_End);
9441 pVar->fRegAcquired = true;
9442 return idxReg;
9443 }
9444
9445 /*
9446 * If the kind of variable has not yet been set, default to 'stack'.
9447 */
9448 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
9449 && pVar->enmKind < kIemNativeVarKind_End);
9450 if (pVar->enmKind == kIemNativeVarKind_Invalid)
9451 iemNativeVarSetKindToStack(pReNative, idxVar);
9452
9453 /*
9454 * We have to allocate a register for the variable, even if its a stack one
9455 * as we don't know if there are modification being made to it before its
9456 * finalized (todo: analyze and insert hints about that?).
9457 *
9458 * If we can, we try get the correct register for argument variables. This
9459 * is assuming that most argument variables are fetched as close as possible
9460 * to the actual call, so that there aren't any interfering hidden calls
9461 * (memory accesses, etc) inbetween.
9462 *
9463 * If we cannot or it's a variable, we make sure no argument registers
9464 * that will be used by this MC block will be allocated here, and we always
9465 * prefer non-volatile registers to avoid needing to spill stuff for internal
9466 * call.
9467 */
9468 /** @todo Detect too early argument value fetches and warn about hidden
9469 * calls causing less optimal code to be generated in the python script. */
9470
9471 uint8_t const uArgNo = pVar->uArgNo;
9472 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
9473 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
9474 {
9475 idxReg = g_aidxIemNativeCallRegs[uArgNo];
9476 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
9477 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
9478 }
9479 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
9480 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
9481 {
9482 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
9483 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
9484 & ~pReNative->Core.bmHstRegsWithGstShadow
9485 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
9486 & fNotArgsMask;
9487 if (fRegs)
9488 {
9489 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
9490 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
9491 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
9492 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
9493 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
9494 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
9495 }
9496 else
9497 {
9498 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
9499 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
9500 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
9501 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
9502 }
9503 }
9504 else
9505 {
9506 idxReg = idxRegPref;
9507 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
9508 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
9509 }
9510 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
9511 pVar->idxReg = idxReg;
9512
9513 /*
9514 * Load it off the stack if we've got a stack slot.
9515 */
9516 uint8_t const idxStackSlot = pVar->idxStackSlot;
9517 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
9518 {
9519 Assert(fInitialized);
9520 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
9521 switch (pVar->cbVar)
9522 {
9523 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
9524 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
9525 case 3: AssertFailed(); RT_FALL_THRU();
9526 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
9527 default: AssertFailed(); RT_FALL_THRU();
9528 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
9529 }
9530 }
9531 else
9532 {
9533 Assert(idxStackSlot == UINT8_MAX);
9534 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9535 }
9536 pVar->fRegAcquired = true;
9537 return idxReg;
9538}
9539
9540
9541/**
9542 * The value of variable @a idxVar will be written in full to the @a enmGstReg
9543 * guest register.
9544 *
9545 * This function makes sure there is a register for it and sets it to be the
9546 * current shadow copy of @a enmGstReg.
9547 *
9548 * @returns The host register number.
9549 * @param pReNative The recompiler state.
9550 * @param idxVar The variable.
9551 * @param enmGstReg The guest register this variable will be written to
9552 * after this call.
9553 * @param poff Pointer to the instruction buffer offset.
9554 * In case a register needs to be freed up or if the
9555 * variable content needs to be loaded off the stack.
9556 *
9557 * @note We DO NOT expect @a idxVar to be an argument variable,
9558 * because we can only in the commit stage of an instruction when this
9559 * function is used.
9560 */
9561DECL_HIDDEN_THROW(uint8_t)
9562iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
9563{
9564 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9565 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9566 Assert(!pVar->fRegAcquired);
9567 AssertMsgStmt( pVar->cbVar <= 8
9568 && ( pVar->enmKind == kIemNativeVarKind_Immediate
9569 || pVar->enmKind == kIemNativeVarKind_Stack),
9570 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
9571 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
9572 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
9573
9574 /*
9575 * This shouldn't ever be used for arguments, unless it's in a weird else
9576 * branch that doesn't do any calling and even then it's questionable.
9577 *
9578 * However, in case someone writes crazy wrong MC code and does register
9579 * updates before making calls, just use the regular register allocator to
9580 * ensure we get a register suitable for the intended argument number.
9581 */
9582 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
9583
9584 /*
9585 * If there is already a register for the variable, we transfer/set the
9586 * guest shadow copy assignment to it.
9587 */
9588 uint8_t idxReg = pVar->idxReg;
9589 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9590 {
9591 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
9592 {
9593 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
9594 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
9595 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
9596 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
9597 }
9598 else
9599 {
9600 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
9601 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
9602 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
9603 }
9604 /** @todo figure this one out. We need some way of making sure the register isn't
9605 * modified after this point, just in case we start writing crappy MC code. */
9606 pVar->enmGstReg = enmGstReg;
9607 pVar->fRegAcquired = true;
9608 return idxReg;
9609 }
9610 Assert(pVar->uArgNo == UINT8_MAX);
9611
9612 /*
9613 * Because this is supposed to be the commit stage, we're just tag along with the
9614 * temporary register allocator and upgrade it to a variable register.
9615 */
9616 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
9617 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
9618 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
9619 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
9620 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
9621 pVar->idxReg = idxReg;
9622
9623 /*
9624 * Now we need to load the register value.
9625 */
9626 if (pVar->enmKind == kIemNativeVarKind_Immediate)
9627 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
9628 else
9629 {
9630 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9631 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
9632 switch (pVar->cbVar)
9633 {
9634 case sizeof(uint64_t):
9635 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
9636 break;
9637 case sizeof(uint32_t):
9638 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
9639 break;
9640 case sizeof(uint16_t):
9641 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
9642 break;
9643 case sizeof(uint8_t):
9644 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
9645 break;
9646 default:
9647 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
9648 }
9649 }
9650
9651 pVar->fRegAcquired = true;
9652 return idxReg;
9653}
9654
9655
9656/**
9657 * Sets the host register for @a idxVarRc to @a idxReg.
9658 *
9659 * The register must not be allocated. Any guest register shadowing will be
9660 * implictly dropped by this call.
9661 *
9662 * The variable must not have any register associated with it (causes
9663 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
9664 * implied.
9665 *
9666 * @returns idxReg
9667 * @param pReNative The recompiler state.
9668 * @param idxVar The variable.
9669 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
9670 * @param off For recording in debug info.
9671 *
9672 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
9673 */
9674DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
9675{
9676 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9677 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9678 Assert(!pVar->fRegAcquired);
9679 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
9680 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
9681 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
9682
9683 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
9684 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
9685
9686 iemNativeVarSetKindToStack(pReNative, idxVar);
9687 pVar->idxReg = idxReg;
9688
9689 return idxReg;
9690}
9691
9692
9693/**
9694 * A convenient helper function.
9695 */
9696DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
9697 uint8_t idxReg, uint32_t *poff)
9698{
9699 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
9700 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
9701 return idxReg;
9702}
9703
9704
9705/**
9706 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
9707 *
9708 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
9709 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
9710 * requirement of flushing anything in volatile host registers when making a
9711 * call.
9712 *
9713 * @returns New @a off value.
9714 * @param pReNative The recompiler state.
9715 * @param off The code buffer position.
9716 * @param fHstRegsNotToSave Set of registers not to save & restore.
9717 */
9718DECL_HIDDEN_THROW(uint32_t)
9719iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
9720{
9721 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
9722 if (fHstRegs)
9723 {
9724 do
9725 {
9726 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
9727 fHstRegs &= ~RT_BIT_32(idxHstReg);
9728
9729 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
9730 {
9731 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
9732 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9733 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
9734 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
9735 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
9736 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
9737 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
9738 {
9739 case kIemNativeVarKind_Stack:
9740 {
9741 /* Temporarily spill the variable register. */
9742 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9743 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
9744 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9745 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
9746 continue;
9747 }
9748
9749 case kIemNativeVarKind_Immediate:
9750 case kIemNativeVarKind_VarRef:
9751 case kIemNativeVarKind_GstRegRef:
9752 /* It is weird to have any of these loaded at this point. */
9753 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
9754 continue;
9755
9756 case kIemNativeVarKind_End:
9757 case kIemNativeVarKind_Invalid:
9758 break;
9759 }
9760 AssertFailed();
9761 }
9762 else
9763 {
9764 /*
9765 * Allocate a temporary stack slot and spill the register to it.
9766 */
9767 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
9768 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
9769 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9770 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
9771 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
9772 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
9773 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9774 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
9775 }
9776 } while (fHstRegs);
9777 }
9778 return off;
9779}
9780
9781
9782/**
9783 * Emit code to restore volatile registers after to a call to a helper.
9784 *
9785 * @returns New @a off value.
9786 * @param pReNative The recompiler state.
9787 * @param off The code buffer position.
9788 * @param fHstRegsNotToSave Set of registers not to save & restore.
9789 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
9790 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
9791 */
9792DECL_HIDDEN_THROW(uint32_t)
9793iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
9794{
9795 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
9796 if (fHstRegs)
9797 {
9798 do
9799 {
9800 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
9801 fHstRegs &= ~RT_BIT_32(idxHstReg);
9802
9803 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
9804 {
9805 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
9806 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9807 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
9808 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
9809 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
9810 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
9811 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
9812 {
9813 case kIemNativeVarKind_Stack:
9814 {
9815 /* Unspill the variable register. */
9816 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9817 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
9818 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9819 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
9820 continue;
9821 }
9822
9823 case kIemNativeVarKind_Immediate:
9824 case kIemNativeVarKind_VarRef:
9825 case kIemNativeVarKind_GstRegRef:
9826 /* It is weird to have any of these loaded at this point. */
9827 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
9828 continue;
9829
9830 case kIemNativeVarKind_End:
9831 case kIemNativeVarKind_Invalid:
9832 break;
9833 }
9834 AssertFailed();
9835 }
9836 else
9837 {
9838 /*
9839 * Restore from temporary stack slot.
9840 */
9841 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
9842 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
9843 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
9844 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
9845
9846 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
9847 }
9848 } while (fHstRegs);
9849 }
9850 return off;
9851}
9852
9853
9854/**
9855 * Worker that frees the stack slots for variable @a idxVar if any allocated.
9856 *
9857 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
9858 *
9859 * ASSUMES that @a idxVar is valid and unpacked.
9860 */
9861DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9862{
9863 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
9864 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
9865 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
9866 {
9867 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
9868 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
9869 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
9870 Assert(cSlots > 0);
9871 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
9872 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
9873 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
9874 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
9875 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
9876 }
9877 else
9878 Assert(idxStackSlot == UINT8_MAX);
9879}
9880
9881
9882/**
9883 * Worker that frees a single variable.
9884 *
9885 * ASSUMES that @a idxVar is valid and unpacked.
9886 */
9887DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9888{
9889 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
9890 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
9891 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
9892
9893 /* Free the host register first if any assigned. */
9894 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
9895 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9896 {
9897 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9898 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9899 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9900 }
9901
9902 /* Free argument mapping. */
9903 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
9904 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
9905 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
9906
9907 /* Free the stack slots. */
9908 iemNativeVarFreeStackSlots(pReNative, idxVar);
9909
9910 /* Free the actual variable. */
9911 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
9912 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9913}
9914
9915
9916/**
9917 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
9918 */
9919DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
9920{
9921 while (bmVars != 0)
9922 {
9923 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
9924 bmVars &= ~RT_BIT_32(idxVar);
9925
9926#if 1 /** @todo optimize by simplifying this later... */
9927 iemNativeVarFreeOneWorker(pReNative, idxVar);
9928#else
9929 /* Only need to free the host register, the rest is done as bulk updates below. */
9930 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
9931 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9932 {
9933 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9934 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9935 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9936 }
9937#endif
9938 }
9939#if 0 /** @todo optimize by simplifying this later... */
9940 pReNative->Core.bmVars = 0;
9941 pReNative->Core.bmStack = 0;
9942 pReNative->Core.u64ArgVars = UINT64_MAX;
9943#endif
9944}
9945
9946
9947/**
9948 * This is called by IEM_MC_END() to clean up all variables.
9949 */
9950DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
9951{
9952 uint32_t const bmVars = pReNative->Core.bmVars;
9953 if (bmVars != 0)
9954 iemNativeVarFreeAllSlow(pReNative, bmVars);
9955 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9956 Assert(pReNative->Core.bmStack == 0);
9957}
9958
9959
9960#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
9961
9962/**
9963 * This is called by IEM_MC_FREE_LOCAL.
9964 */
9965DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9966{
9967 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9968 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
9969 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
9970}
9971
9972
9973#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
9974
9975/**
9976 * This is called by IEM_MC_FREE_ARG.
9977 */
9978DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9979{
9980 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9981 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
9982 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
9983}
9984
9985
9986#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
9987
9988/**
9989 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
9990 */
9991DECL_INLINE_THROW(uint32_t)
9992iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
9993{
9994 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
9995 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
9996 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9997 Assert( pVarDst->cbVar == sizeof(uint16_t)
9998 || pVarDst->cbVar == sizeof(uint32_t));
9999
10000 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
10001 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
10002 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
10003 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
10004 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10005
10006 Assert(pVarDst->cbVar < pVarSrc->cbVar);
10007
10008 /*
10009 * Special case for immediates.
10010 */
10011 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
10012 {
10013 switch (pVarDst->cbVar)
10014 {
10015 case sizeof(uint16_t):
10016 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
10017 break;
10018 case sizeof(uint32_t):
10019 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
10020 break;
10021 default: AssertFailed(); break;
10022 }
10023 }
10024 else
10025 {
10026 /*
10027 * The generic solution for now.
10028 */
10029 /** @todo optimize this by having the python script make sure the source
10030 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
10031 * statement. Then we could just transfer the register assignments. */
10032 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
10033 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
10034 switch (pVarDst->cbVar)
10035 {
10036 case sizeof(uint16_t):
10037 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
10038 break;
10039 case sizeof(uint32_t):
10040 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
10041 break;
10042 default: AssertFailed(); break;
10043 }
10044 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
10045 iemNativeVarRegisterRelease(pReNative, idxVarDst);
10046 }
10047 return off;
10048}
10049
10050
10051
10052/*********************************************************************************************************************************
10053* Emitters for IEM_MC_CALL_CIMPL_XXX *
10054*********************************************************************************************************************************/
10055
10056/**
10057 * Emits code to load a reference to the given guest register into @a idxGprDst.
10058 */
10059DECL_INLINE_THROW(uint32_t)
10060iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
10061 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
10062{
10063#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
10064 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
10065#endif
10066
10067 /*
10068 * Get the offset relative to the CPUMCTX structure.
10069 */
10070 uint32_t offCpumCtx;
10071 switch (enmClass)
10072 {
10073 case kIemNativeGstRegRef_Gpr:
10074 Assert(idxRegInClass < 16);
10075 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
10076 break;
10077
10078 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
10079 Assert(idxRegInClass < 4);
10080 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
10081 break;
10082
10083 case kIemNativeGstRegRef_EFlags:
10084 Assert(idxRegInClass == 0);
10085 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
10086 break;
10087
10088 case kIemNativeGstRegRef_MxCsr:
10089 Assert(idxRegInClass == 0);
10090 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
10091 break;
10092
10093 case kIemNativeGstRegRef_FpuReg:
10094 Assert(idxRegInClass < 8);
10095 AssertFailed(); /** @todo what kind of indexing? */
10096 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
10097 break;
10098
10099 case kIemNativeGstRegRef_MReg:
10100 Assert(idxRegInClass < 8);
10101 AssertFailed(); /** @todo what kind of indexing? */
10102 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
10103 break;
10104
10105 case kIemNativeGstRegRef_XReg:
10106 Assert(idxRegInClass < 16);
10107 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
10108 break;
10109
10110 default:
10111 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
10112 }
10113
10114 /*
10115 * Load the value into the destination register.
10116 */
10117#ifdef RT_ARCH_AMD64
10118 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
10119
10120#elif defined(RT_ARCH_ARM64)
10121 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10122 Assert(offCpumCtx < 4096);
10123 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
10124
10125#else
10126# error "Port me!"
10127#endif
10128
10129 return off;
10130}
10131
10132
10133/**
10134 * Common code for CIMPL and AIMPL calls.
10135 *
10136 * These are calls that uses argument variables and such. They should not be
10137 * confused with internal calls required to implement an MC operation,
10138 * like a TLB load and similar.
10139 *
10140 * Upon return all that is left to do is to load any hidden arguments and
10141 * perform the call. All argument variables are freed.
10142 *
10143 * @returns New code buffer offset; throws VBox status code on error.
10144 * @param pReNative The native recompile state.
10145 * @param off The code buffer offset.
10146 * @param cArgs The total nubmer of arguments (includes hidden
10147 * count).
10148 * @param cHiddenArgs The number of hidden arguments. The hidden
10149 * arguments must not have any variable declared for
10150 * them, whereas all the regular arguments must
10151 * (tstIEMCheckMc ensures this).
10152 */
10153DECL_HIDDEN_THROW(uint32_t)
10154iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
10155{
10156#ifdef VBOX_STRICT
10157 /*
10158 * Assert sanity.
10159 */
10160 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
10161 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
10162 for (unsigned i = 0; i < cHiddenArgs; i++)
10163 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
10164 for (unsigned i = cHiddenArgs; i < cArgs; i++)
10165 {
10166 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
10167 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
10168 }
10169 iemNativeRegAssertSanity(pReNative);
10170#endif
10171
10172 /* We don't know what the called function makes use of, so flush any pending register writes. */
10173 off = iemNativeRegFlushPendingWrites(pReNative, off);
10174
10175 /*
10176 * Before we do anything else, go over variables that are referenced and
10177 * make sure they are not in a register.
10178 */
10179 uint32_t bmVars = pReNative->Core.bmVars;
10180 if (bmVars)
10181 {
10182 do
10183 {
10184 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
10185 bmVars &= ~RT_BIT_32(idxVar);
10186
10187 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
10188 {
10189 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
10190 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
10191 {
10192 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
10193 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
10194 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
10195 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
10196 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
10197
10198 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
10199 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
10200 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
10201 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
10202 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
10203 }
10204 }
10205 } while (bmVars != 0);
10206#if 0 //def VBOX_STRICT
10207 iemNativeRegAssertSanity(pReNative);
10208#endif
10209 }
10210
10211 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
10212
10213 /*
10214 * First, go over the host registers that will be used for arguments and make
10215 * sure they either hold the desired argument or are free.
10216 */
10217 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
10218 {
10219 for (uint32_t i = 0; i < cRegArgs; i++)
10220 {
10221 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
10222 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
10223 {
10224 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
10225 {
10226 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
10227 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
10228 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
10229 Assert(pVar->idxReg == idxArgReg);
10230 uint8_t const uArgNo = pVar->uArgNo;
10231 if (uArgNo == i)
10232 { /* prefect */ }
10233 /* The variable allocator logic should make sure this is impossible,
10234 except for when the return register is used as a parameter (ARM,
10235 but not x86). */
10236#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
10237 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
10238 {
10239# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
10240# error "Implement this"
10241# endif
10242 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
10243 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
10244 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
10245 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
10246 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
10247 }
10248#endif
10249 else
10250 {
10251 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
10252
10253 if (pVar->enmKind == kIemNativeVarKind_Stack)
10254 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
10255 else
10256 {
10257 /* just free it, can be reloaded if used again */
10258 pVar->idxReg = UINT8_MAX;
10259 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
10260 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
10261 }
10262 }
10263 }
10264 else
10265 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
10266 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
10267 }
10268 }
10269#if 0 //def VBOX_STRICT
10270 iemNativeRegAssertSanity(pReNative);
10271#endif
10272 }
10273
10274 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
10275
10276#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
10277 /*
10278 * If there are any stack arguments, make sure they are in their place as well.
10279 *
10280 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
10281 * the caller) be loading it later and it must be free (see first loop).
10282 */
10283 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
10284 {
10285 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
10286 {
10287 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
10288 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
10289 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10290 {
10291 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
10292 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
10293 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
10294 pVar->idxReg = UINT8_MAX;
10295 }
10296 else
10297 {
10298 /* Use ARG0 as temp for stuff we need registers for. */
10299 switch (pVar->enmKind)
10300 {
10301 case kIemNativeVarKind_Stack:
10302 {
10303 uint8_t const idxStackSlot = pVar->idxStackSlot;
10304 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10305 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
10306 iemNativeStackCalcBpDisp(idxStackSlot));
10307 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10308 continue;
10309 }
10310
10311 case kIemNativeVarKind_Immediate:
10312 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
10313 continue;
10314
10315 case kIemNativeVarKind_VarRef:
10316 {
10317 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
10318 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
10319 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
10320 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
10321 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
10322 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
10323 {
10324 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
10325 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
10326 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10327 }
10328 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
10329 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10330 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
10331 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10332 continue;
10333 }
10334
10335 case kIemNativeVarKind_GstRegRef:
10336 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
10337 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
10338 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10339 continue;
10340
10341 case kIemNativeVarKind_Invalid:
10342 case kIemNativeVarKind_End:
10343 break;
10344 }
10345 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
10346 }
10347 }
10348# if 0 //def VBOX_STRICT
10349 iemNativeRegAssertSanity(pReNative);
10350# endif
10351 }
10352#else
10353 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
10354#endif
10355
10356 /*
10357 * Make sure the argument variables are loaded into their respective registers.
10358 *
10359 * We can optimize this by ASSUMING that any register allocations are for
10360 * registeres that have already been loaded and are ready. The previous step
10361 * saw to that.
10362 */
10363 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
10364 {
10365 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
10366 {
10367 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
10368 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
10369 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
10370 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
10371 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
10372 else
10373 {
10374 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
10375 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10376 {
10377 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
10378 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
10379 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
10380 | RT_BIT_32(idxArgReg);
10381 pVar->idxReg = idxArgReg;
10382 }
10383 else
10384 {
10385 /* Use ARG0 as temp for stuff we need registers for. */
10386 switch (pVar->enmKind)
10387 {
10388 case kIemNativeVarKind_Stack:
10389 {
10390 uint8_t const idxStackSlot = pVar->idxStackSlot;
10391 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10392 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
10393 continue;
10394 }
10395
10396 case kIemNativeVarKind_Immediate:
10397 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
10398 continue;
10399
10400 case kIemNativeVarKind_VarRef:
10401 {
10402 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
10403 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
10404 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
10405 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
10406 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
10407 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
10408 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
10409 {
10410 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
10411 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
10412 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10413 }
10414 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
10415 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10416 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
10417 continue;
10418 }
10419
10420 case kIemNativeVarKind_GstRegRef:
10421 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
10422 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
10423 continue;
10424
10425 case kIemNativeVarKind_Invalid:
10426 case kIemNativeVarKind_End:
10427 break;
10428 }
10429 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
10430 }
10431 }
10432 }
10433#if 0 //def VBOX_STRICT
10434 iemNativeRegAssertSanity(pReNative);
10435#endif
10436 }
10437#ifdef VBOX_STRICT
10438 else
10439 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
10440 {
10441 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
10442 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
10443 }
10444#endif
10445
10446 /*
10447 * Free all argument variables (simplified).
10448 * Their lifetime always expires with the call they are for.
10449 */
10450 /** @todo Make the python script check that arguments aren't used after
10451 * IEM_MC_CALL_XXXX. */
10452 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
10453 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
10454 * an argument value. There is also some FPU stuff. */
10455 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
10456 {
10457 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
10458 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
10459
10460 /* no need to free registers: */
10461 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
10462 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
10463 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
10464 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
10465 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
10466 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
10467
10468 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
10469 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
10470 iemNativeVarFreeStackSlots(pReNative, idxVar);
10471 }
10472 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
10473
10474 /*
10475 * Flush volatile registers as we make the call.
10476 */
10477 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
10478
10479 return off;
10480}
10481
10482
10483/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
10484DECL_HIDDEN_THROW(uint32_t)
10485iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
10486 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
10487
10488{
10489 /*
10490 * Do all the call setup and cleanup.
10491 */
10492 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
10493
10494 /*
10495 * Load the two or three hidden arguments.
10496 */
10497#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
10498 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
10499 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10500 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
10501#else
10502 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10503 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
10504#endif
10505
10506 /*
10507 * Make the call and check the return code.
10508 *
10509 * Shadow PC copies are always flushed here, other stuff depends on flags.
10510 * Segment and general purpose registers are explictily flushed via the
10511 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
10512 * macros.
10513 */
10514 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
10515#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
10516 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
10517#endif
10518 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
10519 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
10520 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
10521 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
10522
10523 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
10524}
10525
10526
10527#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
10528 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
10529
10530/** Emits code for IEM_MC_CALL_CIMPL_1. */
10531DECL_INLINE_THROW(uint32_t)
10532iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10533 uintptr_t pfnCImpl, uint8_t idxArg0)
10534{
10535 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10536 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
10537}
10538
10539
10540#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
10541 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
10542
10543/** Emits code for IEM_MC_CALL_CIMPL_2. */
10544DECL_INLINE_THROW(uint32_t)
10545iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10546 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
10547{
10548 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10549 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10550 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
10551}
10552
10553
10554#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
10555 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10556 (uintptr_t)a_pfnCImpl, a0, a1, a2)
10557
10558/** Emits code for IEM_MC_CALL_CIMPL_3. */
10559DECL_INLINE_THROW(uint32_t)
10560iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10561 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10562{
10563 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10564 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10565 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10566 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
10567}
10568
10569
10570#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
10571 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10572 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
10573
10574/** Emits code for IEM_MC_CALL_CIMPL_4. */
10575DECL_INLINE_THROW(uint32_t)
10576iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10577 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
10578{
10579 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10580 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10581 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10582 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
10583 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
10584}
10585
10586
10587#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
10588 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10589 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
10590
10591/** Emits code for IEM_MC_CALL_CIMPL_4. */
10592DECL_INLINE_THROW(uint32_t)
10593iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10594 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
10595{
10596 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10597 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10598 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10599 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
10600 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
10601 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
10602}
10603
10604
10605/** Recompiler debugging: Flush guest register shadow copies. */
10606#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
10607
10608
10609
10610/*********************************************************************************************************************************
10611* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
10612*********************************************************************************************************************************/
10613
10614/**
10615 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
10616 */
10617DECL_INLINE_THROW(uint32_t)
10618iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10619 uintptr_t pfnAImpl, uint8_t cArgs)
10620{
10621 if (idxVarRc != UINT8_MAX)
10622 {
10623 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
10624 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
10625 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
10626 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
10627 }
10628
10629 /*
10630 * Do all the call setup and cleanup.
10631 */
10632 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
10633
10634 /*
10635 * Make the call and update the return code variable if we've got one.
10636 */
10637 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10638 if (idxVarRc != UINT8_MAX)
10639 {
10640off = iemNativeEmitBrk(pReNative, off, 0x4222); /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
10641 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
10642 }
10643
10644 return off;
10645}
10646
10647
10648
10649#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
10650 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
10651
10652#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
10653 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
10654
10655/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
10656DECL_INLINE_THROW(uint32_t)
10657iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
10658{
10659 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
10660}
10661
10662
10663#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
10664 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
10665
10666#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
10667 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
10668
10669/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
10670DECL_INLINE_THROW(uint32_t)
10671iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
10672{
10673 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10674 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
10675}
10676
10677
10678#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
10679 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
10680
10681#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
10682 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
10683
10684/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
10685DECL_INLINE_THROW(uint32_t)
10686iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10687 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10688{
10689 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10690 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10691 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
10692}
10693
10694
10695#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
10696 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
10697
10698#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
10699 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
10700
10701/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
10702DECL_INLINE_THROW(uint32_t)
10703iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10704 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10705{
10706 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10707 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10708 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
10709 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
10710}
10711
10712
10713#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
10714 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
10715
10716#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
10717 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
10718
10719/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
10720DECL_INLINE_THROW(uint32_t)
10721iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10722 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
10723{
10724 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10725 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10726 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
10727 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
10728 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
10729}
10730
10731
10732
10733/*********************************************************************************************************************************
10734* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
10735*********************************************************************************************************************************/
10736
10737#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
10738 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
10739
10740#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
10741 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
10742
10743#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
10744 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
10745
10746#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
10747 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
10748
10749
10750/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
10751 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
10752DECL_INLINE_THROW(uint32_t)
10753iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
10754{
10755 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10756 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10757 Assert(iGRegEx < 20);
10758
10759 /* Same discussion as in iemNativeEmitFetchGregU16 */
10760 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10761 kIemNativeGstRegUse_ReadOnly);
10762
10763 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10764 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10765
10766 /* The value is zero-extended to the full 64-bit host register width. */
10767 if (iGRegEx < 16)
10768 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10769 else
10770 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
10771
10772 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10773 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10774 return off;
10775}
10776
10777
10778#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
10779 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
10780
10781#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
10782 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
10783
10784#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
10785 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
10786
10787/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
10788DECL_INLINE_THROW(uint32_t)
10789iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
10790{
10791 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10792 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
10793 Assert(iGRegEx < 20);
10794
10795 /* Same discussion as in iemNativeEmitFetchGregU16 */
10796 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10797 kIemNativeGstRegUse_ReadOnly);
10798
10799 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10800 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10801
10802 if (iGRegEx < 16)
10803 {
10804 switch (cbSignExtended)
10805 {
10806 case sizeof(uint16_t):
10807 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10808 break;
10809 case sizeof(uint32_t):
10810 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10811 break;
10812 case sizeof(uint64_t):
10813 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10814 break;
10815 default: AssertFailed(); break;
10816 }
10817 }
10818 else
10819 {
10820 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
10821 switch (cbSignExtended)
10822 {
10823 case sizeof(uint16_t):
10824 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10825 break;
10826 case sizeof(uint32_t):
10827 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10828 break;
10829 case sizeof(uint64_t):
10830 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10831 break;
10832 default: AssertFailed(); break;
10833 }
10834 }
10835
10836 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10837 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10838 return off;
10839}
10840
10841
10842
10843#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
10844 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
10845
10846#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
10847 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
10848
10849#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
10850 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
10851
10852/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
10853DECL_INLINE_THROW(uint32_t)
10854iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
10855{
10856 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10857 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10858 Assert(iGReg < 16);
10859
10860 /*
10861 * We can either just load the low 16-bit of the GPR into a host register
10862 * for the variable, or we can do so via a shadow copy host register. The
10863 * latter will avoid having to reload it if it's being stored later, but
10864 * will waste a host register if it isn't touched again. Since we don't
10865 * know what going to happen, we choose the latter for now.
10866 */
10867 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10868 kIemNativeGstRegUse_ReadOnly);
10869
10870 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10871 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10872 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10873 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10874
10875 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10876 return off;
10877}
10878
10879
10880#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
10881 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
10882
10883#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
10884 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
10885
10886/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
10887DECL_INLINE_THROW(uint32_t)
10888iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
10889{
10890 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10891 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
10892 Assert(iGReg < 16);
10893
10894 /*
10895 * We can either just load the low 16-bit of the GPR into a host register
10896 * for the variable, or we can do so via a shadow copy host register. The
10897 * latter will avoid having to reload it if it's being stored later, but
10898 * will waste a host register if it isn't touched again. Since we don't
10899 * know what going to happen, we choose the latter for now.
10900 */
10901 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10902 kIemNativeGstRegUse_ReadOnly);
10903
10904 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10905 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10906 if (cbSignExtended == sizeof(uint32_t))
10907 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10908 else
10909 {
10910 Assert(cbSignExtended == sizeof(uint64_t));
10911 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10912 }
10913 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10914
10915 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10916 return off;
10917}
10918
10919
10920#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
10921 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
10922
10923#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
10924 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
10925
10926/** Emits code for IEM_MC_FETCH_GREG_U32. */
10927DECL_INLINE_THROW(uint32_t)
10928iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
10929{
10930 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10931 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10932 Assert(iGReg < 16);
10933
10934 /*
10935 * We can either just load the low 16-bit of the GPR into a host register
10936 * for the variable, or we can do so via a shadow copy host register. The
10937 * latter will avoid having to reload it if it's being stored later, but
10938 * will waste a host register if it isn't touched again. Since we don't
10939 * know what going to happen, we choose the latter for now.
10940 */
10941 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10942 kIemNativeGstRegUse_ReadOnly);
10943
10944 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10945 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10946 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
10947 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10948
10949 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10950 return off;
10951}
10952
10953
10954#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
10955 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
10956
10957/** Emits code for IEM_MC_FETCH_GREG_U32. */
10958DECL_INLINE_THROW(uint32_t)
10959iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
10960{
10961 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10962 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10963 Assert(iGReg < 16);
10964
10965 /*
10966 * We can either just load the low 32-bit of the GPR into a host register
10967 * for the variable, or we can do so via a shadow copy host register. The
10968 * latter will avoid having to reload it if it's being stored later, but
10969 * will waste a host register if it isn't touched again. Since we don't
10970 * know what going to happen, we choose the latter for now.
10971 */
10972 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10973 kIemNativeGstRegUse_ReadOnly);
10974
10975 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10976 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10977 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
10978 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10979
10980 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10981 return off;
10982}
10983
10984
10985#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
10986 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
10987
10988#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
10989 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
10990
10991/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
10992 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
10993DECL_INLINE_THROW(uint32_t)
10994iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
10995{
10996 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10997 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10998 Assert(iGReg < 16);
10999
11000 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11001 kIemNativeGstRegUse_ReadOnly);
11002
11003 iemNativeVarSetKindToStack(pReNative, idxDstVar);
11004 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
11005 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
11006 /** @todo name the register a shadow one already? */
11007 iemNativeVarRegisterRelease(pReNative, idxDstVar);
11008
11009 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
11010 return off;
11011}
11012
11013
11014
11015/*********************************************************************************************************************************
11016* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
11017*********************************************************************************************************************************/
11018
11019#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
11020 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
11021
11022/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
11023DECL_INLINE_THROW(uint32_t)
11024iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
11025{
11026 Assert(iGRegEx < 20);
11027 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
11028 kIemNativeGstRegUse_ForUpdate);
11029#ifdef RT_ARCH_AMD64
11030 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
11031
11032 /* To the lowest byte of the register: mov r8, imm8 */
11033 if (iGRegEx < 16)
11034 {
11035 if (idxGstTmpReg >= 8)
11036 pbCodeBuf[off++] = X86_OP_REX_B;
11037 else if (idxGstTmpReg >= 4)
11038 pbCodeBuf[off++] = X86_OP_REX;
11039 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
11040 pbCodeBuf[off++] = u8Value;
11041 }
11042 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
11043 else if (idxGstTmpReg < 4)
11044 {
11045 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
11046 pbCodeBuf[off++] = u8Value;
11047 }
11048 else
11049 {
11050 /* ror reg64, 8 */
11051 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11052 pbCodeBuf[off++] = 0xc1;
11053 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11054 pbCodeBuf[off++] = 8;
11055
11056 /* mov reg8, imm8 */
11057 if (idxGstTmpReg >= 8)
11058 pbCodeBuf[off++] = X86_OP_REX_B;
11059 else if (idxGstTmpReg >= 4)
11060 pbCodeBuf[off++] = X86_OP_REX;
11061 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
11062 pbCodeBuf[off++] = u8Value;
11063
11064 /* rol reg64, 8 */
11065 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11066 pbCodeBuf[off++] = 0xc1;
11067 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11068 pbCodeBuf[off++] = 8;
11069 }
11070
11071#elif defined(RT_ARCH_ARM64)
11072 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
11073 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11074 if (iGRegEx < 16)
11075 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
11076 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
11077 else
11078 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
11079 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
11080 iemNativeRegFreeTmp(pReNative, idxImmReg);
11081
11082#else
11083# error "Port me!"
11084#endif
11085
11086 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11087
11088 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
11089
11090 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11091 return off;
11092}
11093
11094
11095#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
11096 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
11097
11098/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
11099DECL_INLINE_THROW(uint32_t)
11100iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
11101{
11102 Assert(iGRegEx < 20);
11103 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11104
11105 /*
11106 * If it's a constant value (unlikely) we treat this as a
11107 * IEM_MC_STORE_GREG_U8_CONST statement.
11108 */
11109 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11110 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11111 { /* likely */ }
11112 else
11113 {
11114 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11115 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11116 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
11117 }
11118
11119 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
11120 kIemNativeGstRegUse_ForUpdate);
11121 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
11122
11123#ifdef RT_ARCH_AMD64
11124 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
11125 if (iGRegEx < 16)
11126 {
11127 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
11128 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
11129 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
11130 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
11131 pbCodeBuf[off++] = X86_OP_REX;
11132 pbCodeBuf[off++] = 0x8a;
11133 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
11134 }
11135 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
11136 else if (idxGstTmpReg < 4 && idxVarReg < 4)
11137 {
11138 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
11139 pbCodeBuf[off++] = 0x8a;
11140 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
11141 }
11142 else
11143 {
11144 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
11145
11146 /* ror reg64, 8 */
11147 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11148 pbCodeBuf[off++] = 0xc1;
11149 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11150 pbCodeBuf[off++] = 8;
11151
11152 /* mov reg8, reg8(r/m) */
11153 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
11154 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
11155 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
11156 pbCodeBuf[off++] = X86_OP_REX;
11157 pbCodeBuf[off++] = 0x8a;
11158 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
11159
11160 /* rol reg64, 8 */
11161 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11162 pbCodeBuf[off++] = 0xc1;
11163 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11164 pbCodeBuf[off++] = 8;
11165 }
11166
11167#elif defined(RT_ARCH_ARM64)
11168 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
11169 or
11170 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
11171 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11172 if (iGRegEx < 16)
11173 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
11174 else
11175 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
11176
11177#else
11178# error "Port me!"
11179#endif
11180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11181
11182 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11183
11184 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
11185 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11186 return off;
11187}
11188
11189
11190
11191#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
11192 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
11193
11194/** Emits code for IEM_MC_STORE_GREG_U16. */
11195DECL_INLINE_THROW(uint32_t)
11196iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
11197{
11198 Assert(iGReg < 16);
11199 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11200 kIemNativeGstRegUse_ForUpdate);
11201#ifdef RT_ARCH_AMD64
11202 /* mov reg16, imm16 */
11203 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
11204 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11205 if (idxGstTmpReg >= 8)
11206 pbCodeBuf[off++] = X86_OP_REX_B;
11207 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
11208 pbCodeBuf[off++] = RT_BYTE1(uValue);
11209 pbCodeBuf[off++] = RT_BYTE2(uValue);
11210
11211#elif defined(RT_ARCH_ARM64)
11212 /* movk xdst, #uValue, lsl #0 */
11213 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11214 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
11215
11216#else
11217# error "Port me!"
11218#endif
11219
11220 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11221
11222 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11223 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11224 return off;
11225}
11226
11227
11228#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
11229 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
11230
11231/** Emits code for IEM_MC_STORE_GREG_U16. */
11232DECL_INLINE_THROW(uint32_t)
11233iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11234{
11235 Assert(iGReg < 16);
11236 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11237
11238 /*
11239 * If it's a constant value (unlikely) we treat this as a
11240 * IEM_MC_STORE_GREG_U16_CONST statement.
11241 */
11242 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11243 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11244 { /* likely */ }
11245 else
11246 {
11247 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11248 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11249 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
11250 }
11251
11252 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11253 kIemNativeGstRegUse_ForUpdate);
11254
11255#ifdef RT_ARCH_AMD64
11256 /* mov reg16, reg16 or [mem16] */
11257 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
11258 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11259 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
11260 {
11261 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
11262 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
11263 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
11264 pbCodeBuf[off++] = 0x8b;
11265 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
11266 }
11267 else
11268 {
11269 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
11270 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
11271 if (idxGstTmpReg >= 8)
11272 pbCodeBuf[off++] = X86_OP_REX_R;
11273 pbCodeBuf[off++] = 0x8b;
11274 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
11275 }
11276
11277#elif defined(RT_ARCH_ARM64)
11278 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
11279 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
11280 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11281 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
11282 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11283
11284#else
11285# error "Port me!"
11286#endif
11287
11288 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11289
11290 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11291 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11292 return off;
11293}
11294
11295
11296#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
11297 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
11298
11299/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
11300DECL_INLINE_THROW(uint32_t)
11301iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
11302{
11303 Assert(iGReg < 16);
11304 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11305 kIemNativeGstRegUse_ForFullWrite);
11306 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
11307 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11308 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11309 return off;
11310}
11311
11312
11313#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
11314 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
11315
11316/** Emits code for IEM_MC_STORE_GREG_U32. */
11317DECL_INLINE_THROW(uint32_t)
11318iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11319{
11320 Assert(iGReg < 16);
11321 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11322
11323 /*
11324 * If it's a constant value (unlikely) we treat this as a
11325 * IEM_MC_STORE_GREG_U32_CONST statement.
11326 */
11327 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11328 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11329 { /* likely */ }
11330 else
11331 {
11332 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11333 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11334 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
11335 }
11336
11337 /*
11338 * For the rest we allocate a guest register for the variable and writes
11339 * it to the CPUMCTX structure.
11340 */
11341 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
11342 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11343#ifdef VBOX_STRICT
11344 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
11345#endif
11346 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11347 return off;
11348}
11349
11350
11351#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
11352 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
11353
11354/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
11355DECL_INLINE_THROW(uint32_t)
11356iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
11357{
11358 Assert(iGReg < 16);
11359 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11360 kIemNativeGstRegUse_ForFullWrite);
11361 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
11362 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11363 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11364 return off;
11365}
11366
11367
11368#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
11369 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
11370
11371/** Emits code for IEM_MC_STORE_GREG_U64. */
11372DECL_INLINE_THROW(uint32_t)
11373iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11374{
11375 Assert(iGReg < 16);
11376 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11377
11378 /*
11379 * If it's a constant value (unlikely) we treat this as a
11380 * IEM_MC_STORE_GREG_U64_CONST statement.
11381 */
11382 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11383 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11384 { /* likely */ }
11385 else
11386 {
11387 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11388 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11389 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
11390 }
11391
11392 /*
11393 * For the rest we allocate a guest register for the variable and writes
11394 * it to the CPUMCTX structure.
11395 */
11396 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
11397 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11398 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11399 return off;
11400}
11401
11402
11403#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
11404 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
11405
11406/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
11407DECL_INLINE_THROW(uint32_t)
11408iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
11409{
11410 Assert(iGReg < 16);
11411 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11412 kIemNativeGstRegUse_ForUpdate);
11413 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
11414 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11415 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11416 return off;
11417}
11418
11419
11420/*********************************************************************************************************************************
11421* General purpose register manipulation (add, sub). *
11422*********************************************************************************************************************************/
11423
11424#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
11425 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
11426
11427/** Emits code for IEM_MC_ADD_GREG_U16. */
11428DECL_INLINE_THROW(uint32_t)
11429iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
11430{
11431 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11432 kIemNativeGstRegUse_ForUpdate);
11433
11434#ifdef RT_ARCH_AMD64
11435 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
11436 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11437 if (idxGstTmpReg >= 8)
11438 pbCodeBuf[off++] = X86_OP_REX_B;
11439 if (uAddend == 1)
11440 {
11441 pbCodeBuf[off++] = 0xff; /* inc */
11442 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11443 }
11444 else
11445 {
11446 pbCodeBuf[off++] = 0x81;
11447 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11448 pbCodeBuf[off++] = uAddend;
11449 pbCodeBuf[off++] = 0;
11450 }
11451
11452#else
11453 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
11454 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11455
11456 /* sub tmp, gstgrp, uAddend */
11457 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
11458
11459 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
11460 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
11461
11462 iemNativeRegFreeTmp(pReNative, idxTmpReg);
11463#endif
11464
11465 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11466
11467 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11468
11469 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11470 return off;
11471}
11472
11473
11474#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
11475 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
11476
11477#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
11478 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
11479
11480/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
11481DECL_INLINE_THROW(uint32_t)
11482iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
11483{
11484 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11485 kIemNativeGstRegUse_ForUpdate);
11486
11487#ifdef RT_ARCH_AMD64
11488 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11489 if (f64Bit)
11490 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
11491 else if (idxGstTmpReg >= 8)
11492 pbCodeBuf[off++] = X86_OP_REX_B;
11493 if (uAddend == 1)
11494 {
11495 pbCodeBuf[off++] = 0xff; /* inc */
11496 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11497 }
11498 else if (uAddend < 128)
11499 {
11500 pbCodeBuf[off++] = 0x83; /* add */
11501 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11502 pbCodeBuf[off++] = RT_BYTE1(uAddend);
11503 }
11504 else
11505 {
11506 pbCodeBuf[off++] = 0x81; /* add */
11507 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11508 pbCodeBuf[off++] = RT_BYTE1(uAddend);
11509 pbCodeBuf[off++] = 0;
11510 pbCodeBuf[off++] = 0;
11511 pbCodeBuf[off++] = 0;
11512 }
11513
11514#else
11515 /* sub tmp, gstgrp, uAddend */
11516 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11517 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
11518
11519#endif
11520
11521 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11522
11523 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11524
11525 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11526 return off;
11527}
11528
11529
11530
11531#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
11532 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
11533
11534/** Emits code for IEM_MC_SUB_GREG_U16. */
11535DECL_INLINE_THROW(uint32_t)
11536iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
11537{
11538 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11539 kIemNativeGstRegUse_ForUpdate);
11540
11541#ifdef RT_ARCH_AMD64
11542 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
11543 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11544 if (idxGstTmpReg >= 8)
11545 pbCodeBuf[off++] = X86_OP_REX_B;
11546 if (uSubtrahend == 1)
11547 {
11548 pbCodeBuf[off++] = 0xff; /* dec */
11549 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11550 }
11551 else
11552 {
11553 pbCodeBuf[off++] = 0x81;
11554 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11555 pbCodeBuf[off++] = uSubtrahend;
11556 pbCodeBuf[off++] = 0;
11557 }
11558
11559#else
11560 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
11561 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11562
11563 /* sub tmp, gstgrp, uSubtrahend */
11564 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
11565
11566 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
11567 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
11568
11569 iemNativeRegFreeTmp(pReNative, idxTmpReg);
11570#endif
11571
11572 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11573
11574 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11575
11576 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11577 return off;
11578}
11579
11580
11581#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
11582 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
11583
11584#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
11585 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
11586
11587/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
11588DECL_INLINE_THROW(uint32_t)
11589iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
11590{
11591 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11592 kIemNativeGstRegUse_ForUpdate);
11593
11594#ifdef RT_ARCH_AMD64
11595 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11596 if (f64Bit)
11597 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
11598 else if (idxGstTmpReg >= 8)
11599 pbCodeBuf[off++] = X86_OP_REX_B;
11600 if (uSubtrahend == 1)
11601 {
11602 pbCodeBuf[off++] = 0xff; /* dec */
11603 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11604 }
11605 else if (uSubtrahend < 128)
11606 {
11607 pbCodeBuf[off++] = 0x83; /* sub */
11608 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11609 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
11610 }
11611 else
11612 {
11613 pbCodeBuf[off++] = 0x81; /* sub */
11614 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11615 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
11616 pbCodeBuf[off++] = 0;
11617 pbCodeBuf[off++] = 0;
11618 pbCodeBuf[off++] = 0;
11619 }
11620
11621#else
11622 /* sub tmp, gstgrp, uSubtrahend */
11623 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11624 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
11625
11626#endif
11627
11628 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11629
11630 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11631
11632 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11633 return off;
11634}
11635
11636
11637/*********************************************************************************************************************************
11638* Local variable manipulation (add, sub, and, or). *
11639*********************************************************************************************************************************/
11640
11641#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
11642 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
11643
11644#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
11645 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
11646
11647#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
11648 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
11649
11650#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
11651 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
11652
11653/** Emits code for AND'ing a local and a constant value. */
11654DECL_INLINE_THROW(uint32_t)
11655iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
11656{
11657#ifdef VBOX_STRICT
11658 switch (cbMask)
11659 {
11660 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
11661 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
11662 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
11663 case sizeof(uint64_t): break;
11664 default: AssertFailedBreak();
11665 }
11666#endif
11667
11668 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11669 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
11670
11671 if (cbMask <= sizeof(uint32_t))
11672 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
11673 else
11674 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
11675
11676 iemNativeVarRegisterRelease(pReNative, idxVar);
11677 return off;
11678}
11679
11680
11681#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
11682 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
11683
11684#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
11685 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
11686
11687#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
11688 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
11689
11690#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
11691 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
11692
11693/** Emits code for OR'ing a local and a constant value. */
11694DECL_INLINE_THROW(uint32_t)
11695iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
11696{
11697#ifdef VBOX_STRICT
11698 switch (cbMask)
11699 {
11700 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
11701 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
11702 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
11703 case sizeof(uint64_t): break;
11704 default: AssertFailedBreak();
11705 }
11706#endif
11707
11708 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11709 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
11710
11711 if (cbMask <= sizeof(uint32_t))
11712 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
11713 else
11714 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
11715
11716 iemNativeVarRegisterRelease(pReNative, idxVar);
11717 return off;
11718}
11719
11720
11721#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
11722 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
11723
11724#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
11725 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
11726
11727#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
11728 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
11729
11730/** Emits code for reversing the byte order in a local value. */
11731DECL_INLINE_THROW(uint32_t)
11732iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
11733{
11734 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11735 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
11736
11737 switch (cbLocal)
11738 {
11739 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
11740 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
11741 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
11742 default: AssertFailedBreak();
11743 }
11744
11745 iemNativeVarRegisterRelease(pReNative, idxVar);
11746 return off;
11747}
11748
11749
11750
11751/*********************************************************************************************************************************
11752* EFLAGS *
11753*********************************************************************************************************************************/
11754
11755#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
11756# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
11757#else
11758# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
11759 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
11760
11761DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
11762{
11763 if (fEflOutput)
11764 {
11765 PVMCPUCC const pVCpu = pReNative->pVCpu;
11766# ifndef IEMLIVENESS_EXTENDED_LAYOUT
11767 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
11768 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
11769 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
11770# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
11771 if (fEflOutput & (a_fEfl)) \
11772 { \
11773 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
11774 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
11775 else \
11776 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
11777 } else do { } while (0)
11778# else
11779 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
11780 IEMLIVENESSBIT const LivenessClobbered =
11781 {
11782 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
11783 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
11784 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
11785 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
11786 };
11787 IEMLIVENESSBIT const LivenessDelayable =
11788 {
11789 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
11790 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
11791 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
11792 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
11793 };
11794# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
11795 if (fEflOutput & (a_fEfl)) \
11796 { \
11797 if (LivenessClobbered.a_fLivenessMember) \
11798 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
11799 else if (LivenessDelayable.a_fLivenessMember) \
11800 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
11801 else \
11802 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
11803 } else do { } while (0)
11804# endif
11805 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
11806 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
11807 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
11808 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
11809 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
11810 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
11811 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
11812# undef CHECK_FLAG_AND_UPDATE_STATS
11813 }
11814 RT_NOREF(fEflInput);
11815}
11816#endif /* VBOX_WITH_STATISTICS */
11817
11818#undef IEM_MC_FETCH_EFLAGS /* should not be used */
11819#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
11820 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
11821
11822/** Handles IEM_MC_FETCH_EFLAGS_EX. */
11823DECL_INLINE_THROW(uint32_t)
11824iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
11825 uint32_t fEflInput, uint32_t fEflOutput)
11826{
11827 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
11828 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
11829 RT_NOREF(fEflInput, fEflOutput);
11830
11831#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
11832# ifdef VBOX_STRICT
11833 if ( pReNative->idxCurCall != 0
11834 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
11835 {
11836 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
11837 uint32_t const fBoth = fEflInput | fEflOutput;
11838# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
11839 AssertMsg( !(fBoth & (a_fElfConst)) \
11840 || (!(fEflInput & (a_fElfConst)) \
11841 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
11842 : !(fEflOutput & (a_fElfConst)) \
11843 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
11844 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
11845 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
11846 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
11847 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
11848 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
11849 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
11850 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
11851 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
11852 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
11853# undef ASSERT_ONE_EFL
11854 }
11855# endif
11856#endif
11857
11858 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
11859 * the existing shadow copy. */
11860 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
11861 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
11862 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
11863 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
11864 return off;
11865}
11866
11867
11868
11869/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
11870 * start using it with custom native code emission (inlining assembly
11871 * instruction helpers). */
11872#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
11873#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
11874 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
11875 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
11876
11877/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
11878DECL_INLINE_THROW(uint32_t)
11879iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
11880{
11881 RT_NOREF(fEflOutput);
11882 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
11883 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
11884
11885#ifdef VBOX_STRICT
11886 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
11887 uint32_t offFixup = off;
11888 off = iemNativeEmitJnzToFixed(pReNative, off, off);
11889 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
11890 iemNativeFixupFixedJump(pReNative, offFixup, off);
11891
11892 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
11893 offFixup = off;
11894 off = iemNativeEmitJzToFixed(pReNative, off, off);
11895 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
11896 iemNativeFixupFixedJump(pReNative, offFixup, off);
11897
11898 /** @todo validate that only bits in the fElfOutput mask changed. */
11899#endif
11900
11901 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
11902 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
11903 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
11904 return off;
11905}
11906
11907
11908
11909/*********************************************************************************************************************************
11910* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
11911*********************************************************************************************************************************/
11912
11913#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
11914 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
11915
11916#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
11917 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
11918
11919#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
11920 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
11921
11922
11923/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
11924 * IEM_MC_FETCH_SREG_ZX_U64. */
11925DECL_INLINE_THROW(uint32_t)
11926iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
11927{
11928 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
11929 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
11930 Assert(iSReg < X86_SREG_COUNT);
11931
11932 /*
11933 * For now, we will not create a shadow copy of a selector. The rational
11934 * is that since we do not recompile the popping and loading of segment
11935 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
11936 * pushing and moving to registers, there is only a small chance that the
11937 * shadow copy will be accessed again before the register is reloaded. One
11938 * scenario would be nested called in 16-bit code, but I doubt it's worth
11939 * the extra register pressure atm.
11940 *
11941 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
11942 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
11943 * store scencario covered at present (r160730).
11944 */
11945 iemNativeVarSetKindToStack(pReNative, idxDstVar);
11946 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
11947 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
11948 iemNativeVarRegisterRelease(pReNative, idxDstVar);
11949 return off;
11950}
11951
11952
11953
11954/*********************************************************************************************************************************
11955* Register references. *
11956*********************************************************************************************************************************/
11957
11958#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
11959 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
11960
11961#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
11962 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
11963
11964/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
11965DECL_INLINE_THROW(uint32_t)
11966iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
11967{
11968 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
11969 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11970 Assert(iGRegEx < 20);
11971
11972 if (iGRegEx < 16)
11973 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
11974 else
11975 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
11976
11977 /* If we've delayed writing back the register value, flush it now. */
11978 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
11979
11980 /* If it's not a const reference we need to flush the shadow copy of the register now. */
11981 if (!fConst)
11982 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
11983
11984 return off;
11985}
11986
11987#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
11988 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
11989
11990#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
11991 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
11992
11993#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
11994 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
11995
11996#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
11997 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
11998
11999#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
12000 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
12001
12002#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
12003 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
12004
12005#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
12006 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
12007
12008#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
12009 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
12010
12011#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
12012 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
12013
12014#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
12015 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
12016
12017/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
12018DECL_INLINE_THROW(uint32_t)
12019iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
12020{
12021 Assert(iGReg < 16);
12022 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
12023 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12024
12025 /* If we've delayed writing back the register value, flush it now. */
12026 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
12027
12028 /* If it's not a const reference we need to flush the shadow copy of the register now. */
12029 if (!fConst)
12030 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
12031
12032 return off;
12033}
12034
12035
12036#undef IEM_MC_REF_EFLAGS /* should not be used. */
12037#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
12038 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
12039 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
12040
12041/** Handles IEM_MC_REF_EFLAGS. */
12042DECL_INLINE_THROW(uint32_t)
12043iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
12044{
12045 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
12046 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12047
12048 /* If we've delayed writing back the register value, flush it now. */
12049 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
12050
12051 /* If there is a shadow copy of guest EFLAGS, flush it now. */
12052 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
12053
12054 return off;
12055}
12056
12057
12058/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
12059 * different code from threaded recompiler, maybe it would be helpful. For now
12060 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
12061#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
12062
12063
12064#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
12065 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
12066
12067#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
12068 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
12069
12070#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
12071 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
12072
12073/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
12074DECL_INLINE_THROW(uint32_t)
12075iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
12076{
12077 Assert(iXReg < 16);
12078 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
12079 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12080
12081 /* If we've delayed writing back the register value, flush it now. */
12082 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
12083
12084#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
12085 /* If it's not a const reference we need to flush the shadow copy of the register now. */
12086 if (!fConst)
12087 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
12088#else
12089 RT_NOREF(fConst);
12090#endif
12091
12092 return off;
12093}
12094
12095
12096#define IEM_MC_REF_MXCSR(a_pfMxcsr) \
12097 off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
12098
12099/** Handles IEM_MC_REF_MXCSR. */
12100DECL_INLINE_THROW(uint32_t)
12101iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
12102{
12103 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
12104 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12105
12106 /* If we've delayed writing back the register value, flush it now. */
12107 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
12108
12109 /* If there is a shadow copy of guest MXCSR, flush it now. */
12110 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
12111
12112 return off;
12113}
12114
12115
12116
12117/*********************************************************************************************************************************
12118* Effective Address Calculation *
12119*********************************************************************************************************************************/
12120#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
12121 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
12122
12123/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
12124 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
12125DECL_INLINE_THROW(uint32_t)
12126iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
12127 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
12128{
12129 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12130
12131 /*
12132 * Handle the disp16 form with no registers first.
12133 *
12134 * Convert to an immediate value, as that'll delay the register allocation
12135 * and assignment till the memory access / call / whatever and we can use
12136 * a more appropriate register (or none at all).
12137 */
12138 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
12139 {
12140 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
12141 return off;
12142 }
12143
12144 /* Determin the displacment. */
12145 uint16_t u16EffAddr;
12146 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12147 {
12148 case 0: u16EffAddr = 0; break;
12149 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
12150 case 2: u16EffAddr = u16Disp; break;
12151 default: AssertFailedStmt(u16EffAddr = 0);
12152 }
12153
12154 /* Determine the registers involved. */
12155 uint8_t idxGstRegBase;
12156 uint8_t idxGstRegIndex;
12157 switch (bRm & X86_MODRM_RM_MASK)
12158 {
12159 case 0:
12160 idxGstRegBase = X86_GREG_xBX;
12161 idxGstRegIndex = X86_GREG_xSI;
12162 break;
12163 case 1:
12164 idxGstRegBase = X86_GREG_xBX;
12165 idxGstRegIndex = X86_GREG_xDI;
12166 break;
12167 case 2:
12168 idxGstRegBase = X86_GREG_xBP;
12169 idxGstRegIndex = X86_GREG_xSI;
12170 break;
12171 case 3:
12172 idxGstRegBase = X86_GREG_xBP;
12173 idxGstRegIndex = X86_GREG_xDI;
12174 break;
12175 case 4:
12176 idxGstRegBase = X86_GREG_xSI;
12177 idxGstRegIndex = UINT8_MAX;
12178 break;
12179 case 5:
12180 idxGstRegBase = X86_GREG_xDI;
12181 idxGstRegIndex = UINT8_MAX;
12182 break;
12183 case 6:
12184 idxGstRegBase = X86_GREG_xBP;
12185 idxGstRegIndex = UINT8_MAX;
12186 break;
12187#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
12188 default:
12189#endif
12190 case 7:
12191 idxGstRegBase = X86_GREG_xBX;
12192 idxGstRegIndex = UINT8_MAX;
12193 break;
12194 }
12195
12196 /*
12197 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
12198 */
12199 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12200 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12201 kIemNativeGstRegUse_ReadOnly);
12202 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
12203 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12204 kIemNativeGstRegUse_ReadOnly)
12205 : UINT8_MAX;
12206#ifdef RT_ARCH_AMD64
12207 if (idxRegIndex == UINT8_MAX)
12208 {
12209 if (u16EffAddr == 0)
12210 {
12211 /* movxz ret, base */
12212 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
12213 }
12214 else
12215 {
12216 /* lea ret32, [base64 + disp32] */
12217 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12218 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12219 if (idxRegRet >= 8 || idxRegBase >= 8)
12220 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
12221 pbCodeBuf[off++] = 0x8d;
12222 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12223 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
12224 else
12225 {
12226 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
12227 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12228 }
12229 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
12230 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
12231 pbCodeBuf[off++] = 0;
12232 pbCodeBuf[off++] = 0;
12233 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12234
12235 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
12236 }
12237 }
12238 else
12239 {
12240 /* lea ret32, [index64 + base64 (+ disp32)] */
12241 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12242 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12243 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12244 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12245 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12246 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12247 pbCodeBuf[off++] = 0x8d;
12248 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
12249 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12250 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
12251 if (bMod == X86_MOD_MEM4)
12252 {
12253 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
12254 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
12255 pbCodeBuf[off++] = 0;
12256 pbCodeBuf[off++] = 0;
12257 }
12258 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12259 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
12260 }
12261
12262#elif defined(RT_ARCH_ARM64)
12263 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
12264 if (u16EffAddr == 0)
12265 {
12266 if (idxRegIndex == UINT8_MAX)
12267 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
12268 else
12269 {
12270 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
12271 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
12272 }
12273 }
12274 else
12275 {
12276 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
12277 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
12278 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
12279 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
12280 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
12281 else
12282 {
12283 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
12284 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
12285 }
12286 if (idxRegIndex != UINT8_MAX)
12287 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
12288 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
12289 }
12290
12291#else
12292# error "port me"
12293#endif
12294
12295 if (idxRegIndex != UINT8_MAX)
12296 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12297 iemNativeRegFreeTmp(pReNative, idxRegBase);
12298 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12299 return off;
12300}
12301
12302
12303#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
12304 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
12305
12306/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
12307 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
12308DECL_INLINE_THROW(uint32_t)
12309iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
12310 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
12311{
12312 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12313
12314 /*
12315 * Handle the disp32 form with no registers first.
12316 *
12317 * Convert to an immediate value, as that'll delay the register allocation
12318 * and assignment till the memory access / call / whatever and we can use
12319 * a more appropriate register (or none at all).
12320 */
12321 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
12322 {
12323 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
12324 return off;
12325 }
12326
12327 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
12328 uint32_t u32EffAddr = 0;
12329 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12330 {
12331 case 0: break;
12332 case 1: u32EffAddr = (int8_t)u32Disp; break;
12333 case 2: u32EffAddr = u32Disp; break;
12334 default: AssertFailed();
12335 }
12336
12337 /* Get the register (or SIB) value. */
12338 uint8_t idxGstRegBase = UINT8_MAX;
12339 uint8_t idxGstRegIndex = UINT8_MAX;
12340 uint8_t cShiftIndex = 0;
12341 switch (bRm & X86_MODRM_RM_MASK)
12342 {
12343 case 0: idxGstRegBase = X86_GREG_xAX; break;
12344 case 1: idxGstRegBase = X86_GREG_xCX; break;
12345 case 2: idxGstRegBase = X86_GREG_xDX; break;
12346 case 3: idxGstRegBase = X86_GREG_xBX; break;
12347 case 4: /* SIB */
12348 {
12349 /* index /w scaling . */
12350 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
12351 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
12352 {
12353 case 0: idxGstRegIndex = X86_GREG_xAX; break;
12354 case 1: idxGstRegIndex = X86_GREG_xCX; break;
12355 case 2: idxGstRegIndex = X86_GREG_xDX; break;
12356 case 3: idxGstRegIndex = X86_GREG_xBX; break;
12357 case 4: cShiftIndex = 0; /*no index*/ break;
12358 case 5: idxGstRegIndex = X86_GREG_xBP; break;
12359 case 6: idxGstRegIndex = X86_GREG_xSI; break;
12360 case 7: idxGstRegIndex = X86_GREG_xDI; break;
12361 }
12362
12363 /* base */
12364 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
12365 {
12366 case 0: idxGstRegBase = X86_GREG_xAX; break;
12367 case 1: idxGstRegBase = X86_GREG_xCX; break;
12368 case 2: idxGstRegBase = X86_GREG_xDX; break;
12369 case 3: idxGstRegBase = X86_GREG_xBX; break;
12370 case 4:
12371 idxGstRegBase = X86_GREG_xSP;
12372 u32EffAddr += uSibAndRspOffset >> 8;
12373 break;
12374 case 5:
12375 if ((bRm & X86_MODRM_MOD_MASK) != 0)
12376 idxGstRegBase = X86_GREG_xBP;
12377 else
12378 {
12379 Assert(u32EffAddr == 0);
12380 u32EffAddr = u32Disp;
12381 }
12382 break;
12383 case 6: idxGstRegBase = X86_GREG_xSI; break;
12384 case 7: idxGstRegBase = X86_GREG_xDI; break;
12385 }
12386 break;
12387 }
12388 case 5: idxGstRegBase = X86_GREG_xBP; break;
12389 case 6: idxGstRegBase = X86_GREG_xSI; break;
12390 case 7: idxGstRegBase = X86_GREG_xDI; break;
12391 }
12392
12393 /*
12394 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
12395 * the start of the function.
12396 */
12397 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
12398 {
12399 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
12400 return off;
12401 }
12402
12403 /*
12404 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12405 */
12406 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12407 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
12408 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12409 kIemNativeGstRegUse_ReadOnly);
12410 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
12411 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12412 kIemNativeGstRegUse_ReadOnly);
12413
12414 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
12415 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
12416 {
12417 idxRegBase = idxRegIndex;
12418 idxRegIndex = UINT8_MAX;
12419 }
12420
12421#ifdef RT_ARCH_AMD64
12422 if (idxRegIndex == UINT8_MAX)
12423 {
12424 if (u32EffAddr == 0)
12425 {
12426 /* mov ret, base */
12427 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12428 }
12429 else
12430 {
12431 /* lea ret32, [base64 + disp32] */
12432 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12433 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12434 if (idxRegRet >= 8 || idxRegBase >= 8)
12435 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
12436 pbCodeBuf[off++] = 0x8d;
12437 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12438 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12439 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
12440 else
12441 {
12442 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12443 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12444 }
12445 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12446 if (bMod == X86_MOD_MEM4)
12447 {
12448 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12449 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12450 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12451 }
12452 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12453 }
12454 }
12455 else
12456 {
12457 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12458 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12459 if (idxRegBase == UINT8_MAX)
12460 {
12461 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
12462 if (idxRegRet >= 8 || idxRegIndex >= 8)
12463 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12464 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12465 pbCodeBuf[off++] = 0x8d;
12466 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
12467 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
12468 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12469 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12470 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12471 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12472 }
12473 else
12474 {
12475 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
12476 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12477 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12478 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12479 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12480 pbCodeBuf[off++] = 0x8d;
12481 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
12482 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12483 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12484 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
12485 if (bMod != X86_MOD_MEM0)
12486 {
12487 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12488 if (bMod == X86_MOD_MEM4)
12489 {
12490 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12491 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12492 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12493 }
12494 }
12495 }
12496 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12497 }
12498
12499#elif defined(RT_ARCH_ARM64)
12500 if (u32EffAddr == 0)
12501 {
12502 if (idxRegIndex == UINT8_MAX)
12503 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12504 else if (idxRegBase == UINT8_MAX)
12505 {
12506 if (cShiftIndex == 0)
12507 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
12508 else
12509 {
12510 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12511 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
12512 }
12513 }
12514 else
12515 {
12516 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12517 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
12518 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
12519 }
12520 }
12521 else
12522 {
12523 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
12524 {
12525 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12526 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
12527 }
12528 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
12529 {
12530 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12531 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
12532 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
12533 }
12534 else
12535 {
12536 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
12537 if (idxRegBase != UINT8_MAX)
12538 {
12539 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12540 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
12541 }
12542 }
12543 if (idxRegIndex != UINT8_MAX)
12544 {
12545 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12546 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
12547 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
12548 }
12549 }
12550
12551#else
12552# error "port me"
12553#endif
12554
12555 if (idxRegIndex != UINT8_MAX)
12556 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12557 if (idxRegBase != UINT8_MAX)
12558 iemNativeRegFreeTmp(pReNative, idxRegBase);
12559 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12560 return off;
12561}
12562
12563
12564#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12565 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12566 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
12567
12568#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12569 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12570 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
12571
12572#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12573 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12574 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
12575
12576/**
12577 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
12578 *
12579 * @returns New off.
12580 * @param pReNative .
12581 * @param off .
12582 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
12583 * bit 4 to REX.X. The two bits are part of the
12584 * REG sub-field, which isn't needed in this
12585 * function.
12586 * @param uSibAndRspOffset Two parts:
12587 * - The first 8 bits make up the SIB byte.
12588 * - The next 8 bits are the fixed RSP/ESP offset
12589 * in case of a pop [xSP].
12590 * @param u32Disp The displacement byte/word/dword, if any.
12591 * @param cbInstr The size of the fully decoded instruction. Used
12592 * for RIP relative addressing.
12593 * @param idxVarRet The result variable number.
12594 * @param f64Bit Whether to use a 64-bit or 32-bit address size
12595 * when calculating the address.
12596 *
12597 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
12598 */
12599DECL_INLINE_THROW(uint32_t)
12600iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
12601 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
12602{
12603 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12604
12605 /*
12606 * Special case the rip + disp32 form first.
12607 */
12608 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
12609 {
12610#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
12611 /* Need to take the current PC offset into account for the displacement, no need to flush here
12612 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
12613 u32Disp += pReNative->Core.offPc;
12614#endif
12615
12616 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12617 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
12618 kIemNativeGstRegUse_ReadOnly);
12619#ifdef RT_ARCH_AMD64
12620 if (f64Bit)
12621 {
12622 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
12623 if ((int32_t)offFinalDisp == offFinalDisp)
12624 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
12625 else
12626 {
12627 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
12628 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
12629 }
12630 }
12631 else
12632 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
12633
12634#elif defined(RT_ARCH_ARM64)
12635 if (f64Bit)
12636 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
12637 (int64_t)(int32_t)u32Disp + cbInstr);
12638 else
12639 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
12640 (int32_t)u32Disp + cbInstr);
12641
12642#else
12643# error "Port me!"
12644#endif
12645 iemNativeRegFreeTmp(pReNative, idxRegPc);
12646 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12647 return off;
12648 }
12649
12650 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
12651 int64_t i64EffAddr = 0;
12652 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12653 {
12654 case 0: break;
12655 case 1: i64EffAddr = (int8_t)u32Disp; break;
12656 case 2: i64EffAddr = (int32_t)u32Disp; break;
12657 default: AssertFailed();
12658 }
12659
12660 /* Get the register (or SIB) value. */
12661 uint8_t idxGstRegBase = UINT8_MAX;
12662 uint8_t idxGstRegIndex = UINT8_MAX;
12663 uint8_t cShiftIndex = 0;
12664 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
12665 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
12666 else /* SIB: */
12667 {
12668 /* index /w scaling . */
12669 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
12670 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
12671 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
12672 if (idxGstRegIndex == 4)
12673 {
12674 /* no index */
12675 cShiftIndex = 0;
12676 idxGstRegIndex = UINT8_MAX;
12677 }
12678
12679 /* base */
12680 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
12681 if (idxGstRegBase == 4)
12682 {
12683 /* pop [rsp] hack */
12684 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
12685 }
12686 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
12687 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
12688 {
12689 /* mod=0 and base=5 -> disp32, no base reg. */
12690 Assert(i64EffAddr == 0);
12691 i64EffAddr = (int32_t)u32Disp;
12692 idxGstRegBase = UINT8_MAX;
12693 }
12694 }
12695
12696 /*
12697 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
12698 * the start of the function.
12699 */
12700 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
12701 {
12702 if (f64Bit)
12703 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
12704 else
12705 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
12706 return off;
12707 }
12708
12709 /*
12710 * Now emit code that calculates:
12711 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12712 * or if !f64Bit:
12713 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12714 */
12715 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12716 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
12717 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12718 kIemNativeGstRegUse_ReadOnly);
12719 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
12720 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12721 kIemNativeGstRegUse_ReadOnly);
12722
12723 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
12724 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
12725 {
12726 idxRegBase = idxRegIndex;
12727 idxRegIndex = UINT8_MAX;
12728 }
12729
12730#ifdef RT_ARCH_AMD64
12731 uint8_t bFinalAdj;
12732 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
12733 bFinalAdj = 0; /* likely */
12734 else
12735 {
12736 /* pop [rsp] with a problematic disp32 value. Split out the
12737 RSP offset and add it separately afterwards (bFinalAdj). */
12738 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
12739 Assert(idxGstRegBase == X86_GREG_xSP);
12740 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
12741 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
12742 Assert(bFinalAdj != 0);
12743 i64EffAddr -= bFinalAdj;
12744 Assert((int32_t)i64EffAddr == i64EffAddr);
12745 }
12746 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
12747//pReNative->pInstrBuf[off++] = 0xcc;
12748
12749 if (idxRegIndex == UINT8_MAX)
12750 {
12751 if (u32EffAddr == 0)
12752 {
12753 /* mov ret, base */
12754 if (f64Bit)
12755 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
12756 else
12757 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12758 }
12759 else
12760 {
12761 /* lea ret, [base + disp32] */
12762 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12763 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12764 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
12765 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12766 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12767 | (f64Bit ? X86_OP_REX_W : 0);
12768 pbCodeBuf[off++] = 0x8d;
12769 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12770 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12771 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
12772 else
12773 {
12774 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12775 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12776 }
12777 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12778 if (bMod == X86_MOD_MEM4)
12779 {
12780 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12781 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12782 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12783 }
12784 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12785 }
12786 }
12787 else
12788 {
12789 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12790 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12791 if (idxRegBase == UINT8_MAX)
12792 {
12793 /* lea ret, [(index64 << cShiftIndex) + disp32] */
12794 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
12795 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12796 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
12797 | (f64Bit ? X86_OP_REX_W : 0);
12798 pbCodeBuf[off++] = 0x8d;
12799 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
12800 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
12801 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12802 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12803 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12804 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12805 }
12806 else
12807 {
12808 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
12809 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12810 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12811 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12812 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
12813 | (f64Bit ? X86_OP_REX_W : 0);
12814 pbCodeBuf[off++] = 0x8d;
12815 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
12816 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12817 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12818 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
12819 if (bMod != X86_MOD_MEM0)
12820 {
12821 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12822 if (bMod == X86_MOD_MEM4)
12823 {
12824 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12825 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12826 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12827 }
12828 }
12829 }
12830 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12831 }
12832
12833 if (!bFinalAdj)
12834 { /* likely */ }
12835 else
12836 {
12837 Assert(f64Bit);
12838 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
12839 }
12840
12841#elif defined(RT_ARCH_ARM64)
12842 if (i64EffAddr == 0)
12843 {
12844 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12845 if (idxRegIndex == UINT8_MAX)
12846 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
12847 else if (idxRegBase != UINT8_MAX)
12848 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
12849 f64Bit, false /*fSetFlags*/, cShiftIndex);
12850 else
12851 {
12852 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
12853 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
12854 }
12855 }
12856 else
12857 {
12858 if (f64Bit)
12859 { /* likely */ }
12860 else
12861 i64EffAddr = (int32_t)i64EffAddr;
12862
12863 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
12864 {
12865 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12866 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
12867 }
12868 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
12869 {
12870 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12871 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
12872 }
12873 else
12874 {
12875 if (f64Bit)
12876 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
12877 else
12878 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
12879 if (idxRegBase != UINT8_MAX)
12880 {
12881 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12882 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
12883 }
12884 }
12885 if (idxRegIndex != UINT8_MAX)
12886 {
12887 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12888 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
12889 f64Bit, false /*fSetFlags*/, cShiftIndex);
12890 }
12891 }
12892
12893#else
12894# error "port me"
12895#endif
12896
12897 if (idxRegIndex != UINT8_MAX)
12898 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12899 if (idxRegBase != UINT8_MAX)
12900 iemNativeRegFreeTmp(pReNative, idxRegBase);
12901 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12902 return off;
12903}
12904
12905
12906/*********************************************************************************************************************************
12907* TLB Lookup. *
12908*********************************************************************************************************************************/
12909
12910/**
12911 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
12912 */
12913DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
12914{
12915 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
12916 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
12917 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
12918 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
12919
12920 /* Do the lookup manually. */
12921 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
12922 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
12923 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
12924 if (RT_LIKELY(pTlbe->uTag == uTag))
12925 {
12926 /*
12927 * Check TLB page table level access flags.
12928 */
12929 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
12930 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
12931 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
12932 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
12933 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
12934 | IEMTLBE_F_PG_UNASSIGNED
12935 | IEMTLBE_F_PT_NO_ACCESSED
12936 | fNoWriteNoDirty | fNoUser);
12937 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
12938 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
12939 {
12940 /*
12941 * Return the address.
12942 */
12943 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
12944 if ((uintptr_t)pbAddr == uResult)
12945 return;
12946 RT_NOREF(cbMem);
12947 AssertFailed();
12948 }
12949 else
12950 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
12951 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
12952 }
12953 else
12954 AssertFailed();
12955 RT_BREAKPOINT();
12956}
12957
12958/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
12959
12960
12961/*********************************************************************************************************************************
12962* Memory fetches and stores common *
12963*********************************************************************************************************************************/
12964
12965typedef enum IEMNATIVEMITMEMOP
12966{
12967 kIemNativeEmitMemOp_Store = 0,
12968 kIemNativeEmitMemOp_Fetch,
12969 kIemNativeEmitMemOp_Fetch_Zx_U16,
12970 kIemNativeEmitMemOp_Fetch_Zx_U32,
12971 kIemNativeEmitMemOp_Fetch_Zx_U64,
12972 kIemNativeEmitMemOp_Fetch_Sx_U16,
12973 kIemNativeEmitMemOp_Fetch_Sx_U32,
12974 kIemNativeEmitMemOp_Fetch_Sx_U64
12975} IEMNATIVEMITMEMOP;
12976
12977/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
12978 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
12979 * (with iSegReg = UINT8_MAX). */
12980DECL_INLINE_THROW(uint32_t)
12981iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
12982 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
12983 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
12984{
12985 /*
12986 * Assert sanity.
12987 */
12988 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
12989 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
12990 Assert( enmOp != kIemNativeEmitMemOp_Store
12991 || pVarValue->enmKind == kIemNativeVarKind_Immediate
12992 || pVarValue->enmKind == kIemNativeVarKind_Stack);
12993 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
12994 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
12995 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
12996 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
12997 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12998 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
12999 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
13000 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
13001#ifdef VBOX_STRICT
13002 if (iSegReg == UINT8_MAX)
13003 {
13004 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
13005 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
13006 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
13007 switch (cbMem)
13008 {
13009 case 1:
13010 Assert( pfnFunction
13011 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
13012 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
13013 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
13014 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
13015 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
13016 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
13017 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
13018 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
13019 : UINT64_C(0xc000b000a0009000) ));
13020 break;
13021 case 2:
13022 Assert( pfnFunction
13023 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
13024 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
13025 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
13026 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
13027 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
13028 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
13029 : UINT64_C(0xc000b000a0009000) ));
13030 break;
13031 case 4:
13032 Assert( pfnFunction
13033 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
13034 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
13035 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
13036 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
13037 : UINT64_C(0xc000b000a0009000) ));
13038 break;
13039 case 8:
13040 Assert( pfnFunction
13041 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
13042 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
13043 : UINT64_C(0xc000b000a0009000) ));
13044 break;
13045 }
13046 }
13047 else
13048 {
13049 Assert(iSegReg < 6);
13050 switch (cbMem)
13051 {
13052 case 1:
13053 Assert( pfnFunction
13054 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
13055 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
13056 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
13057 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
13058 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
13059 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
13060 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
13061 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
13062 : UINT64_C(0xc000b000a0009000) ));
13063 break;
13064 case 2:
13065 Assert( pfnFunction
13066 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
13067 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
13068 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
13069 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
13070 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
13071 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
13072 : UINT64_C(0xc000b000a0009000) ));
13073 break;
13074 case 4:
13075 Assert( pfnFunction
13076 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
13077 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
13078 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
13079 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
13080 : UINT64_C(0xc000b000a0009000) ));
13081 break;
13082 case 8:
13083 Assert( pfnFunction
13084 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
13085 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
13086 : UINT64_C(0xc000b000a0009000) ));
13087 break;
13088 }
13089 }
13090#endif
13091
13092#ifdef VBOX_STRICT
13093 /*
13094 * Check that the fExec flags we've got make sense.
13095 */
13096 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13097#endif
13098
13099 /*
13100 * To keep things simple we have to commit any pending writes first as we
13101 * may end up making calls.
13102 */
13103 /** @todo we could postpone this till we make the call and reload the
13104 * registers after returning from the call. Not sure if that's sensible or
13105 * not, though. */
13106#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13107 off = iemNativeRegFlushPendingWrites(pReNative, off);
13108#else
13109 /* The program counter is treated differently for now. */
13110 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
13111#endif
13112
13113#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13114 /*
13115 * Move/spill/flush stuff out of call-volatile registers.
13116 * This is the easy way out. We could contain this to the tlb-miss branch
13117 * by saving and restoring active stuff here.
13118 */
13119 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
13120#endif
13121
13122 /*
13123 * Define labels and allocate the result register (trying for the return
13124 * register if we can).
13125 */
13126 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13127 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
13128 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
13129 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
13130 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
13131 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
13132 uint8_t const idxRegValueStore = !TlbState.fSkip
13133 && enmOp == kIemNativeEmitMemOp_Store
13134 && pVarValue->enmKind != kIemNativeVarKind_Immediate
13135 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
13136 : UINT8_MAX;
13137 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
13138 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13139 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13140 : UINT32_MAX;
13141
13142 /*
13143 * Jump to the TLB lookup code.
13144 */
13145 if (!TlbState.fSkip)
13146 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13147
13148 /*
13149 * TlbMiss:
13150 *
13151 * Call helper to do the fetching.
13152 * We flush all guest register shadow copies here.
13153 */
13154 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
13155
13156#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13157 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13158#else
13159 RT_NOREF(idxInstr);
13160#endif
13161
13162#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
13163 if (pReNative->Core.offPc)
13164 {
13165 /*
13166 * Update the program counter but restore it at the end of the TlbMiss branch.
13167 * This should allow delaying more program counter updates for the TlbLookup and hit paths
13168 * which are hopefully much more frequent, reducing the amount of memory accesses.
13169 */
13170 /* Allocate a temporary PC register. */
13171 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
13172
13173 /* Perform the addition and store the result. */
13174 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
13175 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
13176
13177 /* Free and flush the PC register. */
13178 iemNativeRegFreeTmp(pReNative, idxPcReg);
13179 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
13180 }
13181#endif
13182
13183#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13184 /* Save variables in volatile registers. */
13185 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13186 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
13187 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
13188 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13189#endif
13190
13191 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
13192 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
13193 if (enmOp == kIemNativeEmitMemOp_Store)
13194 {
13195 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
13196 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
13197#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13198 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13199#else
13200 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
13201 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
13202#endif
13203 }
13204
13205 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
13206 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
13207#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13208 fVolGregMask);
13209#else
13210 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
13211#endif
13212
13213 if (iSegReg != UINT8_MAX)
13214 {
13215 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
13216 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
13217 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
13218 }
13219
13220 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13221 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13222
13223 /* Done setting up parameters, make the call. */
13224 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13225
13226 /*
13227 * Put the result in the right register if this is a fetch.
13228 */
13229 if (enmOp != kIemNativeEmitMemOp_Store)
13230 {
13231 Assert(idxRegValueFetch == pVarValue->idxReg);
13232 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
13233 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
13234 }
13235
13236#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13237 /* Restore variables and guest shadow registers to volatile registers. */
13238 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13239 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13240#endif
13241
13242#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
13243 if (pReNative->Core.offPc)
13244 {
13245 /*
13246 * Time to restore the program counter to its original value.
13247 */
13248 /* Allocate a temporary PC register. */
13249 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
13250
13251 /* Restore the original value. */
13252 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
13253 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
13254
13255 /* Free and flush the PC register. */
13256 iemNativeRegFreeTmp(pReNative, idxPcReg);
13257 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
13258 }
13259#endif
13260
13261#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13262 if (!TlbState.fSkip)
13263 {
13264 /* end of TlbMiss - Jump to the done label. */
13265 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13266 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13267
13268 /*
13269 * TlbLookup:
13270 */
13271 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
13272 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
13273 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
13274
13275 /*
13276 * Emit code to do the actual storing / fetching.
13277 */
13278 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
13279# ifdef VBOX_WITH_STATISTICS
13280 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13281 enmOp == kIemNativeEmitMemOp_Store
13282 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
13283 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
13284# endif
13285 switch (enmOp)
13286 {
13287 case kIemNativeEmitMemOp_Store:
13288 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
13289 {
13290 switch (cbMem)
13291 {
13292 case 1:
13293 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13294 break;
13295 case 2:
13296 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13297 break;
13298 case 4:
13299 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13300 break;
13301 case 8:
13302 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13303 break;
13304 default:
13305 AssertFailed();
13306 }
13307 }
13308 else
13309 {
13310 switch (cbMem)
13311 {
13312 case 1:
13313 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
13314 idxRegMemResult, TlbState.idxReg1);
13315 break;
13316 case 2:
13317 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
13318 idxRegMemResult, TlbState.idxReg1);
13319 break;
13320 case 4:
13321 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
13322 idxRegMemResult, TlbState.idxReg1);
13323 break;
13324 case 8:
13325 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
13326 idxRegMemResult, TlbState.idxReg1);
13327 break;
13328 default:
13329 AssertFailed();
13330 }
13331 }
13332 break;
13333
13334 case kIemNativeEmitMemOp_Fetch:
13335 case kIemNativeEmitMemOp_Fetch_Zx_U16:
13336 case kIemNativeEmitMemOp_Fetch_Zx_U32:
13337 case kIemNativeEmitMemOp_Fetch_Zx_U64:
13338 switch (cbMem)
13339 {
13340 case 1:
13341 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13342 break;
13343 case 2:
13344 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13345 break;
13346 case 4:
13347 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13348 break;
13349 case 8:
13350 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13351 break;
13352 default:
13353 AssertFailed();
13354 }
13355 break;
13356
13357 case kIemNativeEmitMemOp_Fetch_Sx_U16:
13358 Assert(cbMem == 1);
13359 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13360 break;
13361
13362 case kIemNativeEmitMemOp_Fetch_Sx_U32:
13363 Assert(cbMem == 1 || cbMem == 2);
13364 if (cbMem == 1)
13365 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13366 else
13367 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13368 break;
13369
13370 case kIemNativeEmitMemOp_Fetch_Sx_U64:
13371 switch (cbMem)
13372 {
13373 case 1:
13374 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13375 break;
13376 case 2:
13377 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13378 break;
13379 case 4:
13380 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13381 break;
13382 default:
13383 AssertFailed();
13384 }
13385 break;
13386
13387 default:
13388 AssertFailed();
13389 }
13390
13391 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
13392
13393 /*
13394 * TlbDone:
13395 */
13396 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13397
13398 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
13399
13400# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13401 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
13402 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13403# endif
13404 }
13405#else
13406 RT_NOREF(fAlignMask, idxLabelTlbMiss);
13407#endif
13408
13409 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
13410 iemNativeVarRegisterRelease(pReNative, idxVarValue);
13411 return off;
13412}
13413
13414
13415
13416/*********************************************************************************************************************************
13417* Memory fetches (IEM_MEM_FETCH_XXX). *
13418*********************************************************************************************************************************/
13419
13420/* 8-bit segmented: */
13421#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
13422 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
13423 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
13424 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13425
13426#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13427 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13428 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
13429 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13430
13431#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13432 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13433 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13434 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13435
13436#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13437 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13438 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13439 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13440
13441#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13442 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13443 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
13444 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
13445
13446#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13447 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13448 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13449 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
13450
13451#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13452 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13453 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13454 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
13455
13456/* 16-bit segmented: */
13457#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13458 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13459 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13460 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13461
13462#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
13463 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13464 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13465 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
13466
13467#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13468 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13469 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13470 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13471
13472#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13473 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13474 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13475 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13476
13477#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13478 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13479 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13480 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
13481
13482#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13483 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13484 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13485 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
13486
13487
13488/* 32-bit segmented: */
13489#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13490 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13491 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13492 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
13493
13494#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
13495 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13496 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13497 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
13498
13499#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13500 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13501 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13502 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
13503
13504#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13505 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13506 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13507 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
13508
13509
13510/* 64-bit segmented: */
13511#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13512 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13513 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
13514 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
13515
13516
13517
13518/* 8-bit flat: */
13519#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
13520 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
13521 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
13522 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13523
13524#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
13525 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13526 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
13527 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13528
13529#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
13530 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13531 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13532 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13533
13534#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
13535 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13536 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13537 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13538
13539#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
13540 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13541 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
13542 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
13543
13544#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
13545 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13546 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13547 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
13548
13549#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
13550 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13551 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13552 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
13553
13554
13555/* 16-bit flat: */
13556#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
13557 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13558 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13559 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13560
13561#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
13562 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13563 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13564 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
13565
13566#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
13567 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13568 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13569 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13570
13571#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
13572 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13573 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13574 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13575
13576#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
13577 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13578 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13579 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
13580
13581#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
13582 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13583 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13584 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
13585
13586/* 32-bit flat: */
13587#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
13588 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13589 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13590 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
13591
13592#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
13593 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13594 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13595 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
13596
13597#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
13598 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13599 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13600 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
13601
13602#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
13603 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13604 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13605 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
13606
13607/* 64-bit flat: */
13608#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
13609 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13610 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
13611 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
13612
13613
13614
13615/*********************************************************************************************************************************
13616* Memory stores (IEM_MEM_STORE_XXX). *
13617*********************************************************************************************************************************/
13618
13619#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
13620 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
13621 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
13622 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
13623
13624#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
13625 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
13626 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
13627 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
13628
13629#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
13630 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
13631 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
13632 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
13633
13634#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
13635 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
13636 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
13637 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
13638
13639
13640#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
13641 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
13642 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
13643 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
13644
13645#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
13646 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
13647 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
13648 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
13649
13650#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
13651 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
13652 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
13653 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
13654
13655#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
13656 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
13657 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
13658 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
13659
13660
13661#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
13662 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13663 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
13664
13665#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
13666 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13667 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
13668
13669#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
13670 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13671 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
13672
13673#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
13674 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13675 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
13676
13677
13678#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
13679 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13680 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
13681
13682#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
13683 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13684 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
13685
13686#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
13687 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13688 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
13689
13690#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
13691 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13692 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
13693
13694/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
13695 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
13696DECL_INLINE_THROW(uint32_t)
13697iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
13698 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
13699{
13700 /*
13701 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
13702 * to do the grunt work.
13703 */
13704 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
13705 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
13706 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
13707 pfnFunction, idxInstr);
13708 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
13709 return off;
13710}
13711
13712
13713
13714/*********************************************************************************************************************************
13715* Stack Accesses. *
13716*********************************************************************************************************************************/
13717/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
13718#define IEM_MC_PUSH_U16(a_u16Value) \
13719 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
13720 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
13721#define IEM_MC_PUSH_U32(a_u32Value) \
13722 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
13723 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
13724#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
13725 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
13726 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
13727#define IEM_MC_PUSH_U64(a_u64Value) \
13728 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
13729 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
13730
13731#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
13732 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
13733 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
13734#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
13735 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
13736 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
13737#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
13738 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
13739 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
13740
13741#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
13742 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
13743 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
13744#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
13745 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
13746 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
13747
13748
13749DECL_FORCE_INLINE_THROW(uint32_t)
13750iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
13751{
13752 /* Use16BitSp: */
13753#ifdef RT_ARCH_AMD64
13754 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
13755 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
13756#else
13757 /* sub regeff, regrsp, #cbMem */
13758 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
13759 /* and regeff, regeff, #0xffff */
13760 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
13761 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
13762 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
13763 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
13764#endif
13765 return off;
13766}
13767
13768
13769DECL_FORCE_INLINE(uint32_t)
13770iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
13771{
13772 /* Use32BitSp: */
13773 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
13774 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
13775 return off;
13776}
13777
13778
13779/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
13780DECL_INLINE_THROW(uint32_t)
13781iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
13782 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
13783{
13784 /*
13785 * Assert sanity.
13786 */
13787 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
13788 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
13789#ifdef VBOX_STRICT
13790 if (RT_BYTE2(cBitsVarAndFlat) != 0)
13791 {
13792 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
13793 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
13794 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
13795 Assert( pfnFunction
13796 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
13797 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
13798 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
13799 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
13800 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
13801 : UINT64_C(0xc000b000a0009000) ));
13802 }
13803 else
13804 Assert( pfnFunction
13805 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
13806 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
13807 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
13808 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
13809 : UINT64_C(0xc000b000a0009000) ));
13810#endif
13811
13812#ifdef VBOX_STRICT
13813 /*
13814 * Check that the fExec flags we've got make sense.
13815 */
13816 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13817#endif
13818
13819 /*
13820 * To keep things simple we have to commit any pending writes first as we
13821 * may end up making calls.
13822 */
13823 /** @todo we could postpone this till we make the call and reload the
13824 * registers after returning from the call. Not sure if that's sensible or
13825 * not, though. */
13826 off = iemNativeRegFlushPendingWrites(pReNative, off);
13827
13828 /*
13829 * First we calculate the new RSP and the effective stack pointer value.
13830 * For 64-bit mode and flat 32-bit these two are the same.
13831 * (Code structure is very similar to that of PUSH)
13832 */
13833 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
13834 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
13835 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
13836 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
13837 ? cbMem : sizeof(uint16_t);
13838 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
13839 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
13840 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
13841 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
13842 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
13843 if (cBitsFlat != 0)
13844 {
13845 Assert(idxRegEffSp == idxRegRsp);
13846 Assert(cBitsFlat == 32 || cBitsFlat == 64);
13847 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
13848 if (cBitsFlat == 64)
13849 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
13850 else
13851 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
13852 }
13853 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
13854 {
13855 Assert(idxRegEffSp != idxRegRsp);
13856 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
13857 kIemNativeGstRegUse_ReadOnly);
13858#ifdef RT_ARCH_AMD64
13859 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13860#else
13861 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13862#endif
13863 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
13864 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
13865 offFixupJumpToUseOtherBitSp = off;
13866 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13867 {
13868 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
13869 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13870 }
13871 else
13872 {
13873 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
13874 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13875 }
13876 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13877 }
13878 /* SpUpdateEnd: */
13879 uint32_t const offLabelSpUpdateEnd = off;
13880
13881 /*
13882 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
13883 * we're skipping lookup).
13884 */
13885 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
13886 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
13887 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13888 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
13889 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13890 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13891 : UINT32_MAX;
13892 uint8_t const idxRegValue = !TlbState.fSkip
13893 && pVarValue->enmKind != kIemNativeVarKind_Immediate
13894 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
13895 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
13896 : UINT8_MAX;
13897 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
13898
13899
13900 if (!TlbState.fSkip)
13901 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13902 else
13903 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
13904
13905 /*
13906 * Use16BitSp:
13907 */
13908 if (cBitsFlat == 0)
13909 {
13910#ifdef RT_ARCH_AMD64
13911 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13912#else
13913 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13914#endif
13915 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
13916 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13917 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13918 else
13919 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13920 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
13921 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13922 }
13923
13924 /*
13925 * TlbMiss:
13926 *
13927 * Call helper to do the pushing.
13928 */
13929 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
13930
13931#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13932 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13933#else
13934 RT_NOREF(idxInstr);
13935#endif
13936
13937 /* Save variables in volatile registers. */
13938 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13939 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
13940 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
13941 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
13942 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13943
13944 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
13945 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
13946 {
13947 /* Swap them using ARG0 as temp register: */
13948 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
13949 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
13950 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
13951 }
13952 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
13953 {
13954 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
13955 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
13956 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13957
13958 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
13959 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
13960 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13961 }
13962 else
13963 {
13964 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
13965 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13966
13967 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
13968 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
13969 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
13970 }
13971
13972 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13973 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13974
13975 /* Done setting up parameters, make the call. */
13976 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13977
13978 /* Restore variables and guest shadow registers to volatile registers. */
13979 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13980 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13981
13982#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13983 if (!TlbState.fSkip)
13984 {
13985 /* end of TlbMiss - Jump to the done label. */
13986 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13987 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13988
13989 /*
13990 * TlbLookup:
13991 */
13992 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
13993 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13994
13995 /*
13996 * Emit code to do the actual storing / fetching.
13997 */
13998 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
13999# ifdef VBOX_WITH_STATISTICS
14000 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
14001 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
14002# endif
14003 if (idxRegValue != UINT8_MAX)
14004 {
14005 switch (cbMemAccess)
14006 {
14007 case 2:
14008 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
14009 break;
14010 case 4:
14011 if (!fIsIntelSeg)
14012 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
14013 else
14014 {
14015 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
14016 PUSH FS in real mode, so we have to try emulate that here.
14017 We borrow the now unused idxReg1 from the TLB lookup code here. */
14018 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
14019 kIemNativeGstReg_EFlags);
14020 if (idxRegEfl != UINT8_MAX)
14021 {
14022#ifdef ARCH_AMD64
14023 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
14024 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
14025 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
14026#else
14027 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
14028 off, TlbState.idxReg1, idxRegEfl,
14029 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
14030#endif
14031 iemNativeRegFreeTmp(pReNative, idxRegEfl);
14032 }
14033 else
14034 {
14035 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
14036 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
14037 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
14038 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
14039 }
14040 /* ASSUMES the upper half of idxRegValue is ZERO. */
14041 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
14042 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
14043 }
14044 break;
14045 case 8:
14046 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
14047 break;
14048 default:
14049 AssertFailed();
14050 }
14051 }
14052 else
14053 {
14054 switch (cbMemAccess)
14055 {
14056 case 2:
14057 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
14058 idxRegMemResult, TlbState.idxReg1);
14059 break;
14060 case 4:
14061 Assert(!fIsSegReg);
14062 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
14063 idxRegMemResult, TlbState.idxReg1);
14064 break;
14065 case 8:
14066 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
14067 break;
14068 default:
14069 AssertFailed();
14070 }
14071 }
14072
14073 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
14074 TlbState.freeRegsAndReleaseVars(pReNative);
14075
14076 /*
14077 * TlbDone:
14078 *
14079 * Commit the new RSP value.
14080 */
14081 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14082 }
14083#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
14084
14085 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
14086 iemNativeRegFreeTmp(pReNative, idxRegRsp);
14087 if (idxRegEffSp != idxRegRsp)
14088 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
14089
14090 /* The value variable is implictly flushed. */
14091 if (idxRegValue != UINT8_MAX)
14092 iemNativeVarRegisterRelease(pReNative, idxVarValue);
14093 iemNativeVarFreeLocal(pReNative, idxVarValue);
14094
14095 return off;
14096}
14097
14098
14099
14100/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
14101#define IEM_MC_POP_GREG_U16(a_iGReg) \
14102 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
14103 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
14104#define IEM_MC_POP_GREG_U32(a_iGReg) \
14105 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
14106 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
14107#define IEM_MC_POP_GREG_U64(a_iGReg) \
14108 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
14109 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
14110
14111#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
14112 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
14113 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
14114#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
14115 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
14116 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
14117
14118#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
14119 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
14120 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
14121#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
14122 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
14123 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
14124
14125
14126DECL_FORCE_INLINE_THROW(uint32_t)
14127iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
14128 uint8_t idxRegTmp)
14129{
14130 /* Use16BitSp: */
14131#ifdef RT_ARCH_AMD64
14132 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
14133 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
14134 RT_NOREF(idxRegTmp);
14135#else
14136 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
14137 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
14138 /* add tmp, regrsp, #cbMem */
14139 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
14140 /* and tmp, tmp, #0xffff */
14141 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
14142 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
14143 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
14144 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
14145#endif
14146 return off;
14147}
14148
14149
14150DECL_FORCE_INLINE(uint32_t)
14151iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
14152{
14153 /* Use32BitSp: */
14154 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
14155 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
14156 return off;
14157}
14158
14159
14160/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
14161DECL_INLINE_THROW(uint32_t)
14162iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
14163 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
14164{
14165 /*
14166 * Assert sanity.
14167 */
14168 Assert(idxGReg < 16);
14169#ifdef VBOX_STRICT
14170 if (RT_BYTE2(cBitsVarAndFlat) != 0)
14171 {
14172 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
14173 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
14174 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
14175 Assert( pfnFunction
14176 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
14177 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
14178 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
14179 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
14180 : UINT64_C(0xc000b000a0009000) ));
14181 }
14182 else
14183 Assert( pfnFunction
14184 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
14185 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
14186 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
14187 : UINT64_C(0xc000b000a0009000) ));
14188#endif
14189
14190#ifdef VBOX_STRICT
14191 /*
14192 * Check that the fExec flags we've got make sense.
14193 */
14194 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
14195#endif
14196
14197 /*
14198 * To keep things simple we have to commit any pending writes first as we
14199 * may end up making calls.
14200 */
14201 off = iemNativeRegFlushPendingWrites(pReNative, off);
14202
14203 /*
14204 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
14205 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
14206 * directly as the effective stack pointer.
14207 * (Code structure is very similar to that of PUSH)
14208 */
14209 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
14210 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
14211 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
14212 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
14213 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
14214 /** @todo can do a better job picking the register here. For cbMem >= 4 this
14215 * will be the resulting register value. */
14216 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
14217
14218 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
14219 if (cBitsFlat != 0)
14220 {
14221 Assert(idxRegEffSp == idxRegRsp);
14222 Assert(cBitsFlat == 32 || cBitsFlat == 64);
14223 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
14224 }
14225 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
14226 {
14227 Assert(idxRegEffSp != idxRegRsp);
14228 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
14229 kIemNativeGstRegUse_ReadOnly);
14230#ifdef RT_ARCH_AMD64
14231 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14232#else
14233 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
14234#endif
14235 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
14236 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
14237 offFixupJumpToUseOtherBitSp = off;
14238 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
14239 {
14240/** @todo can skip idxRegRsp updating when popping ESP. */
14241 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
14242 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
14243 }
14244 else
14245 {
14246 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
14247 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
14248 }
14249 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14250 }
14251 /* SpUpdateEnd: */
14252 uint32_t const offLabelSpUpdateEnd = off;
14253
14254 /*
14255 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
14256 * we're skipping lookup).
14257 */
14258 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
14259 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
14260 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
14261 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
14262 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
14263 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
14264 : UINT32_MAX;
14265
14266 if (!TlbState.fSkip)
14267 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
14268 else
14269 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
14270
14271 /*
14272 * Use16BitSp:
14273 */
14274 if (cBitsFlat == 0)
14275 {
14276#ifdef RT_ARCH_AMD64
14277 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14278#else
14279 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
14280#endif
14281 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
14282 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
14283 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
14284 else
14285 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
14286 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
14287 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14288 }
14289
14290 /*
14291 * TlbMiss:
14292 *
14293 * Call helper to do the pushing.
14294 */
14295 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
14296
14297#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
14298 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
14299#else
14300 RT_NOREF(idxInstr);
14301#endif
14302
14303 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
14304 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
14305 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
14306 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
14307
14308
14309 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
14310 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
14311 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
14312
14313 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
14314 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
14315
14316 /* Done setting up parameters, make the call. */
14317 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
14318
14319 /* Move the return register content to idxRegMemResult. */
14320 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
14321 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
14322
14323 /* Restore variables and guest shadow registers to volatile registers. */
14324 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
14325 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
14326
14327#ifdef IEMNATIVE_WITH_TLB_LOOKUP
14328 if (!TlbState.fSkip)
14329 {
14330 /* end of TlbMiss - Jump to the done label. */
14331 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
14332 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
14333
14334 /*
14335 * TlbLookup:
14336 */
14337 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
14338 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
14339
14340 /*
14341 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
14342 */
14343 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14344# ifdef VBOX_WITH_STATISTICS
14345 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
14346 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
14347# endif
14348 switch (cbMem)
14349 {
14350 case 2:
14351 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14352 break;
14353 case 4:
14354 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14355 break;
14356 case 8:
14357 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14358 break;
14359 default:
14360 AssertFailed();
14361 }
14362
14363 TlbState.freeRegsAndReleaseVars(pReNative);
14364
14365 /*
14366 * TlbDone:
14367 *
14368 * Set the new RSP value (FLAT accesses needs to calculate it first) and
14369 * commit the popped register value.
14370 */
14371 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14372 }
14373#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
14374
14375 if (idxGReg != X86_GREG_xSP)
14376 {
14377 /* Set the register. */
14378 if (cbMem >= sizeof(uint32_t))
14379 {
14380#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14381 AssertMsg( pReNative->idxCurCall == 0
14382 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
14383 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
14384#endif
14385 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
14386 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
14387 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
14388 }
14389 else
14390 {
14391 Assert(cbMem == sizeof(uint16_t));
14392 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
14393 kIemNativeGstRegUse_ForUpdate);
14394 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
14395 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
14396 iemNativeRegFreeTmp(pReNative, idxRegDst);
14397 }
14398
14399 /* Complete RSP calculation for FLAT mode. */
14400 if (idxRegEffSp == idxRegRsp)
14401 {
14402 if (cBitsFlat == 64)
14403 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
14404 else
14405 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
14406 }
14407 }
14408 else
14409 {
14410 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
14411 if (cbMem == sizeof(uint64_t))
14412 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
14413 else if (cbMem == sizeof(uint32_t))
14414 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
14415 else
14416 {
14417 if (idxRegEffSp == idxRegRsp)
14418 {
14419 if (cBitsFlat == 64)
14420 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
14421 else
14422 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
14423 }
14424 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
14425 }
14426 }
14427 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
14428
14429 iemNativeRegFreeTmp(pReNative, idxRegRsp);
14430 if (idxRegEffSp != idxRegRsp)
14431 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
14432 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
14433
14434 return off;
14435}
14436
14437
14438
14439/*********************************************************************************************************************************
14440* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
14441*********************************************************************************************************************************/
14442
14443#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14444 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14445 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
14446 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
14447
14448#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14449 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14450 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
14451 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
14452
14453#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14454 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14455 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
14456 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
14457
14458#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14459 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14460 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
14461 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
14462
14463
14464#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14465 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14466 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14467 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
14468
14469#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14470 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14471 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14472 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
14473
14474#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14475 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14476 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14477 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
14478
14479#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14480 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14481 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14482 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
14483
14484#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14485 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
14486 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14487 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
14488
14489
14490#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14491 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14492 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14493 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
14494
14495#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14496 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14497 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14498 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
14499
14500#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14501 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14502 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14503 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
14504
14505#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14506 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14507 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14508 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
14509
14510#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14511 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
14512 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14513 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
14514
14515
14516#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14517 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14518 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14519 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
14520
14521#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14522 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14523 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14524 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
14525#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14526 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14527 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14528 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
14529
14530#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14531 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14532 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14533 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
14534
14535#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14536 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
14537 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14538 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
14539
14540
14541#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14542 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
14543 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14544 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
14545
14546#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14547 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
14548 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
14549 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
14550
14551
14552#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14553 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14554 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14555 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
14556
14557#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14558 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14559 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14560 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
14561
14562#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14563 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14564 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14565 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
14566
14567#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14568 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14569 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14570 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
14571
14572
14573
14574#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14575 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14576 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
14577 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
14578
14579#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14580 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14581 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
14582 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
14583
14584#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14585 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14586 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
14587 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
14588
14589#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14590 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14591 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
14592 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
14593
14594
14595#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14596 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14597 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14598 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
14599
14600#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14601 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14602 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14603 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
14604
14605#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14606 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14607 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14608 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
14609
14610#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14611 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14612 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14613 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
14614
14615#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
14616 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
14617 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14618 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
14619
14620
14621#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14622 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14623 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14624 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
14625
14626#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14627 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14628 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14629 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
14630
14631#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14632 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14633 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14634 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
14635
14636#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14637 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14638 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14639 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
14640
14641#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
14642 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
14643 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14644 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
14645
14646
14647#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14648 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14649 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14650 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
14651
14652#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14653 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14654 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14655 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
14656
14657#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14658 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14659 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14660 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
14661
14662#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14663 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14664 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14665 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
14666
14667#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
14668 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
14669 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14670 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
14671
14672
14673#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
14674 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
14675 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14676 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
14677
14678#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
14679 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
14680 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
14681 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
14682
14683
14684#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14685 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14686 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14687 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
14688
14689#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14690 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14691 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14692 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
14693
14694#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14695 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14696 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14697 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
14698
14699#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14700 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14701 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14702 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
14703
14704
14705DECL_INLINE_THROW(uint32_t)
14706iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
14707 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
14708 uintptr_t pfnFunction, uint8_t idxInstr)
14709{
14710 /*
14711 * Assert sanity.
14712 */
14713 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
14714 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
14715 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
14716 && pVarMem->cbVar == sizeof(void *),
14717 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14718
14719 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
14720 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
14721 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
14722 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
14723 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14724
14725 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
14726 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
14727 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
14728 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
14729 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14730
14731 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
14732
14733 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
14734
14735#ifdef VBOX_STRICT
14736# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
14737 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
14738 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
14739 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
14740 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
14741# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
14742 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
14743 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
14744 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
14745
14746 if (iSegReg == UINT8_MAX)
14747 {
14748 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
14749 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
14750 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
14751 switch (cbMem)
14752 {
14753 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
14754 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
14755 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
14756 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
14757 case 10:
14758 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
14759 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
14760 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
14761 break;
14762 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
14763# if 0
14764 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
14765 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
14766# endif
14767 default: AssertFailed(); break;
14768 }
14769 }
14770 else
14771 {
14772 Assert(iSegReg < 6);
14773 switch (cbMem)
14774 {
14775 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
14776 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
14777 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
14778 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
14779 case 10:
14780 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
14781 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
14782 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
14783 break;
14784 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
14785# if 0
14786 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
14787 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
14788# endif
14789 default: AssertFailed(); break;
14790 }
14791 }
14792# undef IEM_MAP_HLP_FN
14793# undef IEM_MAP_HLP_FN_NO_AT
14794#endif
14795
14796#ifdef VBOX_STRICT
14797 /*
14798 * Check that the fExec flags we've got make sense.
14799 */
14800 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
14801#endif
14802
14803 /*
14804 * To keep things simple we have to commit any pending writes first as we
14805 * may end up making calls.
14806 */
14807 off = iemNativeRegFlushPendingWrites(pReNative, off);
14808
14809#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14810 /*
14811 * Move/spill/flush stuff out of call-volatile registers.
14812 * This is the easy way out. We could contain this to the tlb-miss branch
14813 * by saving and restoring active stuff here.
14814 */
14815 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
14816 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
14817#endif
14818
14819 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
14820 while the tlb-miss codepath will temporarily put it on the stack.
14821 Set the the type to stack here so we don't need to do it twice below. */
14822 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
14823 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
14824 /** @todo use a tmp register from TlbState, since they'll be free after tlb
14825 * lookup is done. */
14826
14827 /*
14828 * Define labels and allocate the result register (trying for the return
14829 * register if we can).
14830 */
14831 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
14832 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
14833 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
14834 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
14835 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
14836 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
14837 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
14838 : UINT32_MAX;
14839//off=iemNativeEmitBrk(pReNative, off, 0);
14840 /*
14841 * Jump to the TLB lookup code.
14842 */
14843 if (!TlbState.fSkip)
14844 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
14845
14846 /*
14847 * TlbMiss:
14848 *
14849 * Call helper to do the fetching.
14850 * We flush all guest register shadow copies here.
14851 */
14852 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
14853
14854#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
14855 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
14856#else
14857 RT_NOREF(idxInstr);
14858#endif
14859
14860#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14861 /* Save variables in volatile registers. */
14862 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
14863 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
14864#endif
14865
14866 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
14867 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
14868#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14869 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
14870#else
14871 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
14872#endif
14873
14874 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
14875 if (iSegReg != UINT8_MAX)
14876 {
14877 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
14878 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
14879 }
14880
14881 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
14882 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
14883 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
14884
14885 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
14886 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
14887
14888 /* Done setting up parameters, make the call. */
14889 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
14890
14891 /*
14892 * Put the output in the right registers.
14893 */
14894 Assert(idxRegMemResult == pVarMem->idxReg);
14895 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
14896 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
14897
14898#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14899 /* Restore variables and guest shadow registers to volatile registers. */
14900 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
14901 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
14902#endif
14903
14904 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
14905 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
14906
14907#ifdef IEMNATIVE_WITH_TLB_LOOKUP
14908 if (!TlbState.fSkip)
14909 {
14910 /* end of tlbsmiss - Jump to the done label. */
14911 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
14912 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
14913
14914 /*
14915 * TlbLookup:
14916 */
14917 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
14918 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
14919# ifdef VBOX_WITH_STATISTICS
14920 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
14921 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
14922# endif
14923
14924 /* [idxVarUnmapInfo] = 0; */
14925 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
14926
14927 /*
14928 * TlbDone:
14929 */
14930 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14931
14932 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
14933
14934# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14935 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
14936 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
14937# endif
14938 }
14939#else
14940 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
14941#endif
14942
14943 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
14944 iemNativeVarRegisterRelease(pReNative, idxVarMem);
14945
14946 return off;
14947}
14948
14949
14950#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
14951 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
14952 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
14953
14954#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
14955 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
14956 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
14957
14958#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
14959 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
14960 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
14961
14962#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
14963 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
14964 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
14965
14966DECL_INLINE_THROW(uint32_t)
14967iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
14968 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
14969{
14970 /*
14971 * Assert sanity.
14972 */
14973 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
14974#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
14975 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
14976#endif
14977 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
14978 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
14979 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
14980#ifdef VBOX_STRICT
14981 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
14982 {
14983 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
14984 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
14985 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
14986 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
14987 case IEM_ACCESS_TYPE_WRITE:
14988 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
14989 case IEM_ACCESS_TYPE_READ:
14990 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
14991 default: AssertFailed();
14992 }
14993#else
14994 RT_NOREF(fAccess);
14995#endif
14996
14997 /*
14998 * To keep things simple we have to commit any pending writes first as we
14999 * may end up making calls (there shouldn't be any at this point, so this
15000 * is just for consistency).
15001 */
15002 /** @todo we could postpone this till we make the call and reload the
15003 * registers after returning from the call. Not sure if that's sensible or
15004 * not, though. */
15005 off = iemNativeRegFlushPendingWrites(pReNative, off);
15006
15007 /*
15008 * Move/spill/flush stuff out of call-volatile registers.
15009 *
15010 * We exclude any register holding the bUnmapInfo variable, as we'll be
15011 * checking it after returning from the call and will free it afterwards.
15012 */
15013 /** @todo save+restore active registers and maybe guest shadows in miss
15014 * scenario. */
15015 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
15016
15017 /*
15018 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
15019 * to call the unmap helper function.
15020 *
15021 * The likelyhood of it being zero is higher than for the TLB hit when doing
15022 * the mapping, as a TLB miss for an well aligned and unproblematic memory
15023 * access should also end up with a mapping that won't need special unmapping.
15024 */
15025 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
15026 * should speed up things for the pure interpreter as well when TLBs
15027 * are enabled. */
15028#ifdef RT_ARCH_AMD64
15029 if (pVarUnmapInfo->idxReg == UINT8_MAX)
15030 {
15031 /* test byte [rbp - xxx], 0ffh */
15032 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
15033 pbCodeBuf[off++] = 0xf6;
15034 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
15035 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
15036 pbCodeBuf[off++] = 0xff;
15037 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
15038 }
15039 else
15040#endif
15041 {
15042 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
15043 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
15044 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
15045 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
15046 }
15047 uint32_t const offJmpFixup = off;
15048 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
15049
15050 /*
15051 * Call the unmap helper function.
15052 */
15053#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
15054 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
15055#else
15056 RT_NOREF(idxInstr);
15057#endif
15058
15059 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
15060 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
15061 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
15062
15063 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
15064 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
15065
15066 /* Done setting up parameters, make the call. */
15067 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
15068
15069 /* The bUnmapInfo variable is implictly free by these MCs. */
15070 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
15071
15072 /*
15073 * Done, just fixup the jump for the non-call case.
15074 */
15075 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
15076
15077 return off;
15078}
15079
15080
15081
15082/*********************************************************************************************************************************
15083* State and Exceptions *
15084*********************************************************************************************************************************/
15085
15086#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15087#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
15088
15089#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15090#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15091#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
15092
15093#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15094#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15095#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
15096
15097
15098DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
15099{
15100 /** @todo this needs a lot more work later. */
15101 RT_NOREF(pReNative, fForChange);
15102 return off;
15103}
15104
15105
15106
15107/*********************************************************************************************************************************
15108* Emitters for FPU related operations. *
15109*********************************************************************************************************************************/
15110
15111#define IEM_MC_FETCH_FCW(a_u16Fcw) \
15112 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
15113
15114/** Emits code for IEM_MC_FETCH_FCW. */
15115DECL_INLINE_THROW(uint32_t)
15116iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
15117{
15118 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15119 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
15120
15121 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15122
15123 /* Allocate a temporary FCW register. */
15124 /** @todo eliminate extra register */
15125 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
15126 kIemNativeGstRegUse_ReadOnly);
15127
15128 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
15129
15130 /* Free but don't flush the FCW register. */
15131 iemNativeRegFreeTmp(pReNative, idxFcwReg);
15132 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15133
15134 return off;
15135}
15136
15137
15138#define IEM_MC_FETCH_FSW(a_u16Fsw) \
15139 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
15140
15141/** Emits code for IEM_MC_FETCH_FSW. */
15142DECL_INLINE_THROW(uint32_t)
15143iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
15144{
15145 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15146 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
15147
15148 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
15149 /* Allocate a temporary FSW register. */
15150 /** @todo eliminate extra register */
15151 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
15152 kIemNativeGstRegUse_ReadOnly);
15153
15154 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
15155
15156 /* Free but don't flush the FSW register. */
15157 iemNativeRegFreeTmp(pReNative, idxFswReg);
15158 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15159
15160 return off;
15161}
15162
15163
15164
15165#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15166
15167
15168/*********************************************************************************************************************************
15169* Emitters for SSE/AVX specific operations. *
15170*********************************************************************************************************************************/
15171
15172#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
15173 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
15174
15175/** Emits code for IEM_MC_COPY_XREG_U128. */
15176DECL_INLINE_THROW(uint32_t)
15177iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
15178{
15179 /* Allocate destination and source register. */
15180 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
15181 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
15182 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
15183 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
15184
15185 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
15186 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXRegDst);
15187 /* We don't need to write everything back here as the destination is marked as dirty and will be flushed automatically. */
15188
15189 /* Free but don't flush the source and destination register. */
15190 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
15191 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
15192
15193 return off;
15194}
15195
15196
15197#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
15198 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
15199
15200/** Emits code for IEM_MC_FETCH_XREG_U64. */
15201DECL_INLINE_THROW(uint32_t)
15202iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
15203{
15204 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15205 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
15206
15207 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
15208 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
15209
15210 iemNativeVarSetKindToStack(pReNative, idxDstVar);
15211 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15212
15213 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
15214
15215 /* Free but don't flush the source register. */
15216 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
15217 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15218
15219 return off;
15220}
15221
15222
15223#define IEM_MC_FETCH_XREG_U32(a_u64Value, a_iXReg, a_iDWord) \
15224 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u64Value, a_iXReg, a_iDWord)
15225
15226/** Emits code for IEM_MC_FETCH_XREG_U32. */
15227DECL_INLINE_THROW(uint32_t)
15228iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
15229{
15230 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15231 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
15232
15233 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
15234 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
15235
15236 iemNativeVarSetKindToStack(pReNative, idxDstVar);
15237 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15238
15239 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
15240
15241 /* Free but don't flush the source register. */
15242 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
15243 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15244
15245 return off;
15246}
15247
15248
15249#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
15250 off = iemNativeEmitSimdStoreXregU64(pReNative, off, a_iXReg, a_u64Value, a_iQWord)
15251
15252/** Emits code for IEM_MC_STORE_XREG_U64. */
15253DECL_INLINE_THROW(uint32_t)
15254iemNativeEmitSimdStoreXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar, uint8_t iQWord)
15255{
15256 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15257 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
15258
15259 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
15260 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
15261
15262 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15263
15264 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQWord);
15265 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
15266
15267 /* Free but don't flush the source register. */
15268 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
15269 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15270
15271 return off;
15272}
15273
15274
15275#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
15276 off = iemNativeEmitSimdStoreXregU32(pReNative, off, a_iXReg, a_u32Value, a_iDWord)
15277
15278/** Emits code for IEM_MC_STORE_XREG_U32. */
15279DECL_INLINE_THROW(uint32_t)
15280iemNativeEmitSimdStoreXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar, uint8_t iDWord)
15281{
15282 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15283 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
15284
15285 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
15286 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
15287
15288 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15289
15290 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iDWord);
15291 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
15292
15293 /* Free but don't flush the source register. */
15294 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
15295 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15296
15297 return off;
15298}
15299
15300
15301#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc) \
15302 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, 0)
15303
15304/** Emits code for IEM_MC_FETCH_YREG_U64. */
15305DECL_INLINE_THROW(uint32_t)
15306iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
15307{
15308 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15309 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
15310
15311 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
15312 iQWord >= 2
15313 ? kIemNativeGstSimdRegLdStSz_High128
15314 : kIemNativeGstSimdRegLdStSz_Low128,
15315 kIemNativeGstRegUse_ReadOnly);
15316
15317 iemNativeVarSetKindToStack(pReNative, idxDstVar);
15318 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15319
15320 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
15321
15322 /* Free but don't flush the source register. */
15323 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
15324 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15325
15326 return off;
15327}
15328
15329
15330#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
15331 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
15332
15333/** Emits code for IEM_MC_FETCH_YREG_U32. */
15334DECL_INLINE_THROW(uint32_t)
15335iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
15336{
15337 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15338 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
15339
15340 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
15341 iDWord >= 4
15342 ? kIemNativeGstSimdRegLdStSz_High128
15343 : kIemNativeGstSimdRegLdStSz_Low128,
15344 kIemNativeGstRegUse_ReadOnly);
15345
15346 iemNativeVarSetKindToStack(pReNative, idxDstVar);
15347 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15348
15349 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
15350
15351 /* Free but don't flush the source register. */
15352 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
15353 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15354
15355 return off;
15356}
15357
15358
15359#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
15360 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
15361
15362/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
15363DECL_INLINE_THROW(uint32_t)
15364iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
15365{
15366 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
15367 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
15368
15369 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
15370 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
15371
15372 /* Free but don't flush the register. */
15373 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
15374
15375 return off;
15376}
15377
15378#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
15379
15380
15381/*********************************************************************************************************************************
15382* The native code generator functions for each MC block. *
15383*********************************************************************************************************************************/
15384
15385/*
15386 * Include instruction emitters.
15387 */
15388#include "target-x86/IEMAllN8veEmit-x86.h"
15389
15390/*
15391 * Include g_apfnIemNativeRecompileFunctions and associated functions.
15392 *
15393 * This should probably live in it's own file later, but lets see what the
15394 * compile times turn out to be first.
15395 */
15396#include "IEMNativeFunctions.cpp.h"
15397
15398
15399
15400/*********************************************************************************************************************************
15401* Recompiler Core. *
15402*********************************************************************************************************************************/
15403
15404
15405/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
15406static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
15407{
15408 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
15409 pDis->cbCachedInstr += cbMaxRead;
15410 RT_NOREF(cbMinRead);
15411 return VERR_NO_DATA;
15412}
15413
15414
15415DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
15416{
15417 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
15418 {
15419#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
15420 ENTRY(fLocalForcedActions),
15421 ENTRY(iem.s.rcPassUp),
15422 ENTRY(iem.s.fExec),
15423 ENTRY(iem.s.pbInstrBuf),
15424 ENTRY(iem.s.uInstrBufPc),
15425 ENTRY(iem.s.GCPhysInstrBuf),
15426 ENTRY(iem.s.cbInstrBufTotal),
15427 ENTRY(iem.s.idxTbCurInstr),
15428#ifdef VBOX_WITH_STATISTICS
15429 ENTRY(iem.s.StatNativeTlbHitsForFetch),
15430 ENTRY(iem.s.StatNativeTlbHitsForStore),
15431 ENTRY(iem.s.StatNativeTlbHitsForStack),
15432 ENTRY(iem.s.StatNativeTlbHitsForMapped),
15433 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
15434 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
15435 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
15436 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
15437#endif
15438 ENTRY(iem.s.DataTlb.aEntries),
15439 ENTRY(iem.s.DataTlb.uTlbRevision),
15440 ENTRY(iem.s.DataTlb.uTlbPhysRev),
15441 ENTRY(iem.s.DataTlb.cTlbHits),
15442 ENTRY(iem.s.CodeTlb.aEntries),
15443 ENTRY(iem.s.CodeTlb.uTlbRevision),
15444 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
15445 ENTRY(iem.s.CodeTlb.cTlbHits),
15446 ENTRY(pVMR3),
15447 ENTRY(cpum.GstCtx.rax),
15448 ENTRY(cpum.GstCtx.ah),
15449 ENTRY(cpum.GstCtx.rcx),
15450 ENTRY(cpum.GstCtx.ch),
15451 ENTRY(cpum.GstCtx.rdx),
15452 ENTRY(cpum.GstCtx.dh),
15453 ENTRY(cpum.GstCtx.rbx),
15454 ENTRY(cpum.GstCtx.bh),
15455 ENTRY(cpum.GstCtx.rsp),
15456 ENTRY(cpum.GstCtx.rbp),
15457 ENTRY(cpum.GstCtx.rsi),
15458 ENTRY(cpum.GstCtx.rdi),
15459 ENTRY(cpum.GstCtx.r8),
15460 ENTRY(cpum.GstCtx.r9),
15461 ENTRY(cpum.GstCtx.r10),
15462 ENTRY(cpum.GstCtx.r11),
15463 ENTRY(cpum.GstCtx.r12),
15464 ENTRY(cpum.GstCtx.r13),
15465 ENTRY(cpum.GstCtx.r14),
15466 ENTRY(cpum.GstCtx.r15),
15467 ENTRY(cpum.GstCtx.es.Sel),
15468 ENTRY(cpum.GstCtx.es.u64Base),
15469 ENTRY(cpum.GstCtx.es.u32Limit),
15470 ENTRY(cpum.GstCtx.es.Attr),
15471 ENTRY(cpum.GstCtx.cs.Sel),
15472 ENTRY(cpum.GstCtx.cs.u64Base),
15473 ENTRY(cpum.GstCtx.cs.u32Limit),
15474 ENTRY(cpum.GstCtx.cs.Attr),
15475 ENTRY(cpum.GstCtx.ss.Sel),
15476 ENTRY(cpum.GstCtx.ss.u64Base),
15477 ENTRY(cpum.GstCtx.ss.u32Limit),
15478 ENTRY(cpum.GstCtx.ss.Attr),
15479 ENTRY(cpum.GstCtx.ds.Sel),
15480 ENTRY(cpum.GstCtx.ds.u64Base),
15481 ENTRY(cpum.GstCtx.ds.u32Limit),
15482 ENTRY(cpum.GstCtx.ds.Attr),
15483 ENTRY(cpum.GstCtx.fs.Sel),
15484 ENTRY(cpum.GstCtx.fs.u64Base),
15485 ENTRY(cpum.GstCtx.fs.u32Limit),
15486 ENTRY(cpum.GstCtx.fs.Attr),
15487 ENTRY(cpum.GstCtx.gs.Sel),
15488 ENTRY(cpum.GstCtx.gs.u64Base),
15489 ENTRY(cpum.GstCtx.gs.u32Limit),
15490 ENTRY(cpum.GstCtx.gs.Attr),
15491 ENTRY(cpum.GstCtx.rip),
15492 ENTRY(cpum.GstCtx.eflags),
15493 ENTRY(cpum.GstCtx.uRipInhibitInt),
15494#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15495 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
15496 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
15497 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
15498 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
15499 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
15500 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
15501 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
15502 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
15503 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
15504 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
15505 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
15506 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
15507 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
15508 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
15509 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
15510 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
15511 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
15512 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
15513 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
15514 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
15515 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
15516 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
15517 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
15518 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
15519 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
15520 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
15521 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
15522 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
15523 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
15524 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
15525 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
15526 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
15527#endif
15528#undef ENTRY
15529 };
15530#ifdef VBOX_STRICT
15531 static bool s_fOrderChecked = false;
15532 if (!s_fOrderChecked)
15533 {
15534 s_fOrderChecked = true;
15535 uint32_t offPrev = s_aMembers[0].off;
15536 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
15537 {
15538 Assert(s_aMembers[i].off > offPrev);
15539 offPrev = s_aMembers[i].off;
15540 }
15541 }
15542#endif
15543
15544 /*
15545 * Binary lookup.
15546 */
15547 unsigned iStart = 0;
15548 unsigned iEnd = RT_ELEMENTS(s_aMembers);
15549 for (;;)
15550 {
15551 unsigned const iCur = iStart + (iEnd - iStart) / 2;
15552 uint32_t const offCur = s_aMembers[iCur].off;
15553 if (off < offCur)
15554 {
15555 if (iCur != iStart)
15556 iEnd = iCur;
15557 else
15558 break;
15559 }
15560 else if (off > offCur)
15561 {
15562 if (iCur + 1 < iEnd)
15563 iStart = iCur + 1;
15564 else
15565 break;
15566 }
15567 else
15568 return s_aMembers[iCur].pszName;
15569 }
15570#ifdef VBOX_WITH_STATISTICS
15571 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
15572 return "iem.s.acThreadedFuncStats[iFn]";
15573#endif
15574 return NULL;
15575}
15576
15577
15578/**
15579 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
15580 * @returns pszBuf.
15581 * @param fFlags The flags.
15582 * @param pszBuf The output buffer.
15583 * @param cbBuf The output buffer size. At least 32 bytes.
15584 */
15585DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
15586{
15587 Assert(cbBuf >= 32);
15588 static RTSTRTUPLE const s_aModes[] =
15589 {
15590 /* [00] = */ { RT_STR_TUPLE("16BIT") },
15591 /* [01] = */ { RT_STR_TUPLE("32BIT") },
15592 /* [02] = */ { RT_STR_TUPLE("!2!") },
15593 /* [03] = */ { RT_STR_TUPLE("!3!") },
15594 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
15595 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
15596 /* [06] = */ { RT_STR_TUPLE("!6!") },
15597 /* [07] = */ { RT_STR_TUPLE("!7!") },
15598 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
15599 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
15600 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
15601 /* [0b] = */ { RT_STR_TUPLE("!b!") },
15602 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
15603 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
15604 /* [0e] = */ { RT_STR_TUPLE("!e!") },
15605 /* [0f] = */ { RT_STR_TUPLE("!f!") },
15606 /* [10] = */ { RT_STR_TUPLE("!10!") },
15607 /* [11] = */ { RT_STR_TUPLE("!11!") },
15608 /* [12] = */ { RT_STR_TUPLE("!12!") },
15609 /* [13] = */ { RT_STR_TUPLE("!13!") },
15610 /* [14] = */ { RT_STR_TUPLE("!14!") },
15611 /* [15] = */ { RT_STR_TUPLE("!15!") },
15612 /* [16] = */ { RT_STR_TUPLE("!16!") },
15613 /* [17] = */ { RT_STR_TUPLE("!17!") },
15614 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
15615 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
15616 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
15617 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
15618 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
15619 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
15620 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
15621 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
15622 };
15623 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
15624 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
15625 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
15626
15627 pszBuf[off++] = ' ';
15628 pszBuf[off++] = 'C';
15629 pszBuf[off++] = 'P';
15630 pszBuf[off++] = 'L';
15631 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
15632 Assert(off < 32);
15633
15634 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
15635
15636 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
15637 {
15638 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
15639 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
15640 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
15641 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
15642 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
15643 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
15644 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
15645 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
15646 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
15647 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
15648 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
15649 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
15650 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
15651 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
15652 };
15653 if (fFlags)
15654 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
15655 if (s_aFlags[i].fFlag & fFlags)
15656 {
15657 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
15658 pszBuf[off++] = ' ';
15659 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
15660 off += s_aFlags[i].cchName;
15661 fFlags &= ~s_aFlags[i].fFlag;
15662 if (!fFlags)
15663 break;
15664 }
15665 pszBuf[off] = '\0';
15666
15667 return pszBuf;
15668}
15669
15670
15671DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
15672{
15673 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
15674#if defined(RT_ARCH_AMD64)
15675 static const char * const a_apszMarkers[] =
15676 {
15677 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
15678 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
15679 };
15680#endif
15681
15682 char szDisBuf[512];
15683 DISSTATE Dis;
15684 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
15685 uint32_t const cNative = pTb->Native.cInstructions;
15686 uint32_t offNative = 0;
15687#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15688 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
15689#endif
15690 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
15691 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
15692 : DISCPUMODE_64BIT;
15693#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15694 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
15695#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15696 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
15697#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15698# error "Port me"
15699#else
15700 csh hDisasm = ~(size_t)0;
15701# if defined(RT_ARCH_AMD64)
15702 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
15703# elif defined(RT_ARCH_ARM64)
15704 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
15705# else
15706# error "Port me"
15707# endif
15708 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
15709
15710 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
15711 //Assert(rcCs == CS_ERR_OK);
15712#endif
15713
15714 /*
15715 * Print TB info.
15716 */
15717 pHlp->pfnPrintf(pHlp,
15718 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
15719 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
15720 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
15721 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
15722#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15723 if (pDbgInfo && pDbgInfo->cEntries > 1)
15724 {
15725 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
15726
15727 /*
15728 * This disassembly is driven by the debug info which follows the native
15729 * code and indicates when it starts with the next guest instructions,
15730 * where labels are and such things.
15731 */
15732 uint32_t idxThreadedCall = 0;
15733 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
15734 uint8_t idxRange = UINT8_MAX;
15735 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
15736 uint32_t offRange = 0;
15737 uint32_t offOpcodes = 0;
15738 uint32_t const cbOpcodes = pTb->cbOpcodes;
15739 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
15740 uint32_t const cDbgEntries = pDbgInfo->cEntries;
15741 uint32_t iDbgEntry = 1;
15742 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
15743
15744 while (offNative < cNative)
15745 {
15746 /* If we're at or have passed the point where the next chunk of debug
15747 info starts, process it. */
15748 if (offDbgNativeNext <= offNative)
15749 {
15750 offDbgNativeNext = UINT32_MAX;
15751 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
15752 {
15753 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
15754 {
15755 case kIemTbDbgEntryType_GuestInstruction:
15756 {
15757 /* Did the exec flag change? */
15758 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
15759 {
15760 pHlp->pfnPrintf(pHlp,
15761 " fExec change %#08x -> %#08x %s\n",
15762 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
15763 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
15764 szDisBuf, sizeof(szDisBuf)));
15765 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
15766 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
15767 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
15768 : DISCPUMODE_64BIT;
15769 }
15770
15771 /* New opcode range? We need to fend up a spurious debug info entry here for cases
15772 where the compilation was aborted before the opcode was recorded and the actual
15773 instruction was translated to a threaded call. This may happen when we run out
15774 of ranges, or when some complicated interrupts/FFs are found to be pending or
15775 similar. So, we just deal with it here rather than in the compiler code as it
15776 is a lot simpler to do here. */
15777 if ( idxRange == UINT8_MAX
15778 || idxRange >= cRanges
15779 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
15780 {
15781 idxRange += 1;
15782 if (idxRange < cRanges)
15783 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
15784 else
15785 continue;
15786 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
15787 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
15788 + (pTb->aRanges[idxRange].idxPhysPage == 0
15789 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
15790 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
15791 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
15792 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
15793 pTb->aRanges[idxRange].idxPhysPage);
15794 GCPhysPc += offRange;
15795 }
15796
15797 /* Disassemble the instruction. */
15798 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
15799 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
15800 uint32_t cbInstr = 1;
15801 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
15802 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
15803 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
15804 if (RT_SUCCESS(rc))
15805 {
15806 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15807 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15808 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15809 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15810
15811 static unsigned const s_offMarker = 55;
15812 static char const s_szMarker[] = " ; <--- guest";
15813 if (cch < s_offMarker)
15814 {
15815 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
15816 cch = s_offMarker;
15817 }
15818 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
15819 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
15820
15821 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
15822 }
15823 else
15824 {
15825 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
15826 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
15827 cbInstr = 1;
15828 }
15829 GCPhysPc += cbInstr;
15830 offOpcodes += cbInstr;
15831 offRange += cbInstr;
15832 continue;
15833 }
15834
15835 case kIemTbDbgEntryType_ThreadedCall:
15836 pHlp->pfnPrintf(pHlp,
15837 " Call #%u to %s (%u args) - %s\n",
15838 idxThreadedCall,
15839 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
15840 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
15841 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
15842 idxThreadedCall++;
15843 continue;
15844
15845 case kIemTbDbgEntryType_GuestRegShadowing:
15846 {
15847 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
15848 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
15849 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
15850 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
15851 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
15852 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
15853 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
15854 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
15855 else
15856 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
15857 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
15858 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
15859 continue;
15860 }
15861
15862#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15863 case kIemTbDbgEntryType_GuestSimdRegShadowing:
15864 {
15865 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
15866 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
15867 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
15868 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
15869 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
15870 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
15871 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
15872 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
15873 else
15874 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
15875 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
15876 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
15877 continue;
15878 }
15879#endif
15880
15881 case kIemTbDbgEntryType_Label:
15882 {
15883 const char *pszName = "what_the_fudge";
15884 const char *pszComment = "";
15885 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
15886 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
15887 {
15888 case kIemNativeLabelType_Return:
15889 pszName = "Return";
15890 break;
15891 case kIemNativeLabelType_ReturnBreak:
15892 pszName = "ReturnBreak";
15893 break;
15894 case kIemNativeLabelType_ReturnWithFlags:
15895 pszName = "ReturnWithFlags";
15896 break;
15897 case kIemNativeLabelType_NonZeroRetOrPassUp:
15898 pszName = "NonZeroRetOrPassUp";
15899 break;
15900 case kIemNativeLabelType_RaiseGp0:
15901 pszName = "RaiseGp0";
15902 break;
15903 case kIemNativeLabelType_RaiseNm:
15904 pszName = "RaiseNm";
15905 break;
15906 case kIemNativeLabelType_RaiseUd:
15907 pszName = "RaiseUd";
15908 break;
15909 case kIemNativeLabelType_RaiseMf:
15910 pszName = "RaiseMf";
15911 break;
15912 case kIemNativeLabelType_RaiseXf:
15913 pszName = "RaiseXf";
15914 break;
15915 case kIemNativeLabelType_ObsoleteTb:
15916 pszName = "ObsoleteTb";
15917 break;
15918 case kIemNativeLabelType_NeedCsLimChecking:
15919 pszName = "NeedCsLimChecking";
15920 break;
15921 case kIemNativeLabelType_CheckBranchMiss:
15922 pszName = "CheckBranchMiss";
15923 break;
15924 case kIemNativeLabelType_If:
15925 pszName = "If";
15926 fNumbered = true;
15927 break;
15928 case kIemNativeLabelType_Else:
15929 pszName = "Else";
15930 fNumbered = true;
15931 pszComment = " ; regs state restored pre-if-block";
15932 break;
15933 case kIemNativeLabelType_Endif:
15934 pszName = "Endif";
15935 fNumbered = true;
15936 break;
15937 case kIemNativeLabelType_CheckIrq:
15938 pszName = "CheckIrq_CheckVM";
15939 fNumbered = true;
15940 break;
15941 case kIemNativeLabelType_TlbLookup:
15942 pszName = "TlbLookup";
15943 fNumbered = true;
15944 break;
15945 case kIemNativeLabelType_TlbMiss:
15946 pszName = "TlbMiss";
15947 fNumbered = true;
15948 break;
15949 case kIemNativeLabelType_TlbDone:
15950 pszName = "TlbDone";
15951 fNumbered = true;
15952 break;
15953 case kIemNativeLabelType_Invalid:
15954 case kIemNativeLabelType_End:
15955 break;
15956 }
15957 if (fNumbered)
15958 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
15959 else
15960 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
15961 continue;
15962 }
15963
15964 case kIemTbDbgEntryType_NativeOffset:
15965 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
15966 Assert(offDbgNativeNext > offNative);
15967 break;
15968
15969#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
15970 case kIemTbDbgEntryType_DelayedPcUpdate:
15971 pHlp->pfnPrintf(pHlp,
15972 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
15973 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
15974 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
15975 continue;
15976#endif
15977
15978 default:
15979 AssertFailed();
15980 }
15981 iDbgEntry++;
15982 break;
15983 }
15984 }
15985
15986 /*
15987 * Disassemble the next native instruction.
15988 */
15989 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
15990# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
15991 uint32_t cbInstr = sizeof(paNative[0]);
15992 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
15993 if (RT_SUCCESS(rc))
15994 {
15995# if defined(RT_ARCH_AMD64)
15996 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
15997 {
15998 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
15999 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
16000 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
16001 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
16002 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
16003 uInfo & 0x8000 ? "recompiled" : "todo");
16004 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
16005 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
16006 else
16007 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
16008 }
16009 else
16010# endif
16011 {
16012 const char *pszAnnotation = NULL;
16013# ifdef RT_ARCH_AMD64
16014 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
16015 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
16016 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
16017 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
16018 PCDISOPPARAM pMemOp;
16019 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
16020 pMemOp = &Dis.Param1;
16021 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
16022 pMemOp = &Dis.Param2;
16023 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
16024 pMemOp = &Dis.Param3;
16025 else
16026 pMemOp = NULL;
16027 if ( pMemOp
16028 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
16029 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
16030 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
16031 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
16032
16033#elif defined(RT_ARCH_ARM64)
16034 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
16035 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
16036 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
16037# else
16038# error "Port me"
16039# endif
16040 if (pszAnnotation)
16041 {
16042 static unsigned const s_offAnnotation = 55;
16043 size_t const cchAnnotation = strlen(pszAnnotation);
16044 size_t cchDis = strlen(szDisBuf);
16045 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
16046 {
16047 if (cchDis < s_offAnnotation)
16048 {
16049 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
16050 cchDis = s_offAnnotation;
16051 }
16052 szDisBuf[cchDis++] = ' ';
16053 szDisBuf[cchDis++] = ';';
16054 szDisBuf[cchDis++] = ' ';
16055 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
16056 }
16057 }
16058 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
16059 }
16060 }
16061 else
16062 {
16063# if defined(RT_ARCH_AMD64)
16064 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
16065 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
16066# elif defined(RT_ARCH_ARM64)
16067 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
16068# else
16069# error "Port me"
16070# endif
16071 cbInstr = sizeof(paNative[0]);
16072 }
16073 offNative += cbInstr / sizeof(paNative[0]);
16074
16075# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
16076 cs_insn *pInstr;
16077 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
16078 (uintptr_t)pNativeCur, 1, &pInstr);
16079 if (cInstrs > 0)
16080 {
16081 Assert(cInstrs == 1);
16082 const char *pszAnnotation = NULL;
16083# if defined(RT_ARCH_ARM64)
16084 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
16085 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
16086 {
16087 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
16088 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
16089 char *psz = strchr(pInstr->op_str, '[');
16090 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
16091 {
16092 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
16093 int32_t off = -1;
16094 psz += 4;
16095 if (*psz == ']')
16096 off = 0;
16097 else if (*psz == ',')
16098 {
16099 psz = RTStrStripL(psz + 1);
16100 if (*psz == '#')
16101 off = RTStrToInt32(&psz[1]);
16102 /** @todo deal with index registers and LSL as well... */
16103 }
16104 if (off >= 0)
16105 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
16106 }
16107 }
16108# endif
16109
16110 size_t const cchOp = strlen(pInstr->op_str);
16111# if defined(RT_ARCH_AMD64)
16112 if (pszAnnotation)
16113 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
16114 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
16115 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
16116 else
16117 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
16118 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
16119
16120# else
16121 if (pszAnnotation)
16122 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
16123 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
16124 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
16125 else
16126 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
16127 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
16128# endif
16129 offNative += pInstr->size / sizeof(*pNativeCur);
16130 cs_free(pInstr, cInstrs);
16131 }
16132 else
16133 {
16134# if defined(RT_ARCH_AMD64)
16135 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
16136 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
16137# else
16138 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
16139# endif
16140 offNative++;
16141 }
16142# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
16143 }
16144 }
16145 else
16146#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
16147 {
16148 /*
16149 * No debug info, just disassemble the x86 code and then the native code.
16150 *
16151 * First the guest code:
16152 */
16153 for (unsigned i = 0; i < pTb->cRanges; i++)
16154 {
16155 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
16156 + (pTb->aRanges[i].idxPhysPage == 0
16157 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
16158 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
16159 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
16160 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
16161 unsigned off = pTb->aRanges[i].offOpcodes;
16162 /** @todo this ain't working when crossing pages! */
16163 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
16164 while (off < cbOpcodes)
16165 {
16166 uint32_t cbInstr = 1;
16167 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
16168 &pTb->pabOpcodes[off], cbOpcodes - off,
16169 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
16170 if (RT_SUCCESS(rc))
16171 {
16172 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
16173 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
16174 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
16175 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
16176 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
16177 GCPhysPc += cbInstr;
16178 off += cbInstr;
16179 }
16180 else
16181 {
16182 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
16183 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
16184 break;
16185 }
16186 }
16187 }
16188
16189 /*
16190 * Then the native code:
16191 */
16192 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
16193 while (offNative < cNative)
16194 {
16195 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
16196# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
16197 uint32_t cbInstr = sizeof(paNative[0]);
16198 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
16199 if (RT_SUCCESS(rc))
16200 {
16201# if defined(RT_ARCH_AMD64)
16202 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
16203 {
16204 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
16205 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
16206 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
16207 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
16208 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
16209 uInfo & 0x8000 ? "recompiled" : "todo");
16210 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
16211 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
16212 else
16213 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
16214 }
16215 else
16216# endif
16217 {
16218# ifdef RT_ARCH_AMD64
16219 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
16220 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
16221 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
16222 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
16223# elif defined(RT_ARCH_ARM64)
16224 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
16225 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
16226 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
16227# else
16228# error "Port me"
16229# endif
16230 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
16231 }
16232 }
16233 else
16234 {
16235# if defined(RT_ARCH_AMD64)
16236 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
16237 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
16238# else
16239 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
16240# endif
16241 cbInstr = sizeof(paNative[0]);
16242 }
16243 offNative += cbInstr / sizeof(paNative[0]);
16244
16245# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
16246 cs_insn *pInstr;
16247 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
16248 (uintptr_t)pNativeCur, 1, &pInstr);
16249 if (cInstrs > 0)
16250 {
16251 Assert(cInstrs == 1);
16252# if defined(RT_ARCH_AMD64)
16253 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
16254 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
16255# else
16256 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
16257 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
16258# endif
16259 offNative += pInstr->size / sizeof(*pNativeCur);
16260 cs_free(pInstr, cInstrs);
16261 }
16262 else
16263 {
16264# if defined(RT_ARCH_AMD64)
16265 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
16266 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
16267# else
16268 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
16269# endif
16270 offNative++;
16271 }
16272# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
16273 }
16274 }
16275
16276#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
16277 /* Cleanup. */
16278 cs_close(&hDisasm);
16279#endif
16280}
16281
16282
16283/**
16284 * Recompiles the given threaded TB into a native one.
16285 *
16286 * In case of failure the translation block will be returned as-is.
16287 *
16288 * @returns pTb.
16289 * @param pVCpu The cross context virtual CPU structure of the calling
16290 * thread.
16291 * @param pTb The threaded translation to recompile to native.
16292 */
16293DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
16294{
16295 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
16296
16297 /*
16298 * The first time thru, we allocate the recompiler state, the other times
16299 * we just need to reset it before using it again.
16300 */
16301 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
16302 if (RT_LIKELY(pReNative))
16303 iemNativeReInit(pReNative, pTb);
16304 else
16305 {
16306 pReNative = iemNativeInit(pVCpu, pTb);
16307 AssertReturn(pReNative, pTb);
16308 }
16309
16310#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
16311 /*
16312 * First do liveness analysis. This is done backwards.
16313 */
16314 {
16315 uint32_t idxCall = pTb->Thrd.cCalls;
16316 if (idxCall <= pReNative->cLivenessEntriesAlloc)
16317 { /* likely */ }
16318 else
16319 {
16320 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
16321 while (idxCall > cAlloc)
16322 cAlloc *= 2;
16323 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
16324 AssertReturn(pvNew, pTb);
16325 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
16326 pReNative->cLivenessEntriesAlloc = cAlloc;
16327 }
16328 AssertReturn(idxCall > 0, pTb);
16329 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
16330
16331 /* The initial (final) entry. */
16332 idxCall--;
16333 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
16334
16335 /* Loop backwards thru the calls and fill in the other entries. */
16336 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
16337 while (idxCall > 0)
16338 {
16339 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
16340 if (pfnLiveness)
16341 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
16342 else
16343 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
16344 pCallEntry--;
16345 idxCall--;
16346 }
16347
16348# ifdef VBOX_WITH_STATISTICS
16349 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
16350 to 'clobbered' rather that 'input'. */
16351 /** @todo */
16352# endif
16353 }
16354#endif
16355
16356 /*
16357 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
16358 * for aborting if an error happens.
16359 */
16360 uint32_t cCallsLeft = pTb->Thrd.cCalls;
16361#ifdef LOG_ENABLED
16362 uint32_t const cCallsOrg = cCallsLeft;
16363#endif
16364 uint32_t off = 0;
16365 int rc = VINF_SUCCESS;
16366 IEMNATIVE_TRY_SETJMP(pReNative, rc)
16367 {
16368 /*
16369 * Emit prolog code (fixed).
16370 */
16371 off = iemNativeEmitProlog(pReNative, off);
16372
16373 /*
16374 * Convert the calls to native code.
16375 */
16376#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16377 int32_t iGstInstr = -1;
16378#endif
16379#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
16380 uint32_t cThreadedCalls = 0;
16381 uint32_t cRecompiledCalls = 0;
16382#endif
16383#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
16384 uint32_t idxCurCall = 0;
16385#endif
16386 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
16387 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
16388 while (cCallsLeft-- > 0)
16389 {
16390 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
16391#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
16392 pReNative->idxCurCall = idxCurCall;
16393#endif
16394
16395 /*
16396 * Debug info, assembly markup and statistics.
16397 */
16398#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
16399 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
16400 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
16401#endif
16402#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16403 iemNativeDbgInfoAddNativeOffset(pReNative, off);
16404 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
16405 {
16406 if (iGstInstr < (int32_t)pTb->cInstructions)
16407 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
16408 else
16409 Assert(iGstInstr == pTb->cInstructions);
16410 iGstInstr = pCallEntry->idxInstr;
16411 }
16412 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
16413#endif
16414#if defined(VBOX_STRICT)
16415 off = iemNativeEmitMarker(pReNative, off,
16416 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
16417#endif
16418#if defined(VBOX_STRICT)
16419 iemNativeRegAssertSanity(pReNative);
16420#endif
16421#ifdef VBOX_WITH_STATISTICS
16422 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
16423#endif
16424
16425 /*
16426 * Actual work.
16427 */
16428 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
16429 pfnRecom ? "(recompiled)" : "(todo)"));
16430 if (pfnRecom) /** @todo stats on this. */
16431 {
16432 off = pfnRecom(pReNative, off, pCallEntry);
16433 STAM_REL_STATS({cRecompiledCalls++;});
16434 }
16435 else
16436 {
16437 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
16438 STAM_REL_STATS({cThreadedCalls++;});
16439 }
16440 Assert(off <= pReNative->cInstrBufAlloc);
16441 Assert(pReNative->cCondDepth == 0);
16442
16443#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
16444 if (LogIs2Enabled())
16445 {
16446 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
16447# ifndef IEMLIVENESS_EXTENDED_LAYOUT
16448 static const char s_achState[] = "CUXI";
16449# else
16450 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
16451# endif
16452
16453 char szGpr[17];
16454 for (unsigned i = 0; i < 16; i++)
16455 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
16456 szGpr[16] = '\0';
16457
16458 char szSegBase[X86_SREG_COUNT + 1];
16459 char szSegLimit[X86_SREG_COUNT + 1];
16460 char szSegAttrib[X86_SREG_COUNT + 1];
16461 char szSegSel[X86_SREG_COUNT + 1];
16462 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
16463 {
16464 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
16465 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
16466 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
16467 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
16468 }
16469 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
16470 = szSegSel[X86_SREG_COUNT] = '\0';
16471
16472 char szEFlags[8];
16473 for (unsigned i = 0; i < 7; i++)
16474 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
16475 szEFlags[7] = '\0';
16476
16477 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
16478 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
16479 }
16480#endif
16481
16482 /*
16483 * Advance.
16484 */
16485 pCallEntry++;
16486#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
16487 idxCurCall++;
16488#endif
16489 }
16490
16491 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
16492 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
16493 if (!cThreadedCalls)
16494 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
16495
16496 /*
16497 * Emit the epilog code.
16498 */
16499 uint32_t idxReturnLabel;
16500 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
16501
16502 /*
16503 * Generate special jump labels.
16504 */
16505 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
16506 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
16507 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
16508 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
16509 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
16510 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
16511 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
16512 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
16513 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
16514 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
16515 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseMf))
16516 off = iemNativeEmitRaiseMf(pReNative, off, idxReturnLabel);
16517 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseXf))
16518 off = iemNativeEmitRaiseXf(pReNative, off, idxReturnLabel);
16519 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
16520 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
16521 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
16522 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
16523 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
16524 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
16525 }
16526 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
16527 {
16528 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
16529 return pTb;
16530 }
16531 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
16532 Assert(off <= pReNative->cInstrBufAlloc);
16533
16534 /*
16535 * Make sure all labels has been defined.
16536 */
16537 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
16538#ifdef VBOX_STRICT
16539 uint32_t const cLabels = pReNative->cLabels;
16540 for (uint32_t i = 0; i < cLabels; i++)
16541 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
16542#endif
16543
16544 /*
16545 * Allocate executable memory, copy over the code we've generated.
16546 */
16547 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
16548 if (pTbAllocator->pDelayedFreeHead)
16549 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
16550
16551 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
16552 AssertReturn(paFinalInstrBuf, pTb);
16553 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
16554
16555 /*
16556 * Apply fixups.
16557 */
16558 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
16559 uint32_t const cFixups = pReNative->cFixups;
16560 for (uint32_t i = 0; i < cFixups; i++)
16561 {
16562 Assert(paFixups[i].off < off);
16563 Assert(paFixups[i].idxLabel < cLabels);
16564 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
16565 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
16566 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
16567 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
16568 switch (paFixups[i].enmType)
16569 {
16570#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
16571 case kIemNativeFixupType_Rel32:
16572 Assert(paFixups[i].off + 4 <= off);
16573 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16574 continue;
16575
16576#elif defined(RT_ARCH_ARM64)
16577 case kIemNativeFixupType_RelImm26At0:
16578 {
16579 Assert(paFixups[i].off < off);
16580 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16581 Assert(offDisp >= -262144 && offDisp < 262144);
16582 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
16583 continue;
16584 }
16585
16586 case kIemNativeFixupType_RelImm19At5:
16587 {
16588 Assert(paFixups[i].off < off);
16589 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16590 Assert(offDisp >= -262144 && offDisp < 262144);
16591 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
16592 continue;
16593 }
16594
16595 case kIemNativeFixupType_RelImm14At5:
16596 {
16597 Assert(paFixups[i].off < off);
16598 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16599 Assert(offDisp >= -8192 && offDisp < 8192);
16600 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
16601 continue;
16602 }
16603
16604#endif
16605 case kIemNativeFixupType_Invalid:
16606 case kIemNativeFixupType_End:
16607 break;
16608 }
16609 AssertFailed();
16610 }
16611
16612 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
16613 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
16614
16615 /*
16616 * Convert the translation block.
16617 */
16618 RTMemFree(pTb->Thrd.paCalls);
16619 pTb->Native.paInstructions = paFinalInstrBuf;
16620 pTb->Native.cInstructions = off;
16621 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
16622#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16623 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
16624 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
16625#endif
16626
16627 Assert(pTbAllocator->cThreadedTbs > 0);
16628 pTbAllocator->cThreadedTbs -= 1;
16629 pTbAllocator->cNativeTbs += 1;
16630 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
16631
16632#ifdef LOG_ENABLED
16633 /*
16634 * Disassemble to the log if enabled.
16635 */
16636 if (LogIs3Enabled())
16637 {
16638 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
16639 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
16640# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
16641 RTLogFlush(NULL);
16642# endif
16643 }
16644#endif
16645 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
16646
16647 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
16648 return pTb;
16649}
16650
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette